Re: Linux 3.4.20

From: Greg KH
Date: Mon Nov 26 2012 - 15:25:57 EST


diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index 9b1067a..68c5411 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -466,6 +466,10 @@ Note:
5.3 swappiness

Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only.
+Please note that unlike the global swappiness, memcg knob set to 0
+really prevents from any swapping even if there is a swap storage
+available. This might lead to memcg OOM killer if there are no file
+pages to reclaim.

Following cgroups' swappiness can't be changed.
- root cgroup (uses /proc/sys/vm/swappiness).
diff --git a/Makefile b/Makefile
index e264929..9c89559 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
VERSION = 3
PATCHLEVEL = 4
-SUBLEVEL = 19
+SUBLEVEL = 20
EXTRAVERSION =
NAME = Saber-toothed Squirrel

diff --git a/arch/arm/plat-omap/include/plat/omap-serial.h b/arch/arm/plat-omap/include/plat/omap-serial.h
index c369c9d..9ff4444 100644
--- a/arch/arm/plat-omap/include/plat/omap-serial.h
+++ b/arch/arm/plat-omap/include/plat/omap-serial.h
@@ -42,10 +42,10 @@
#define OMAP_UART_WER_MOD_WKUP 0X7F

/* Enable XON/XOFF flow control on output */
-#define OMAP_UART_SW_TX 0x8
+#define OMAP_UART_SW_TX 0x04

/* Enable XON/XOFF flow control on input */
-#define OMAP_UART_SW_RX 0x2
+#define OMAP_UART_SW_RX 0x04

#define OMAP_UART_SYSC_RESET 0X07
#define OMAP_UART_TCR_TRIG 0X0F
diff --git a/arch/m68k/include/asm/signal.h b/arch/m68k/include/asm/signal.h
index 60e8866..93fe83e 100644
--- a/arch/m68k/include/asm/signal.h
+++ b/arch/m68k/include/asm/signal.h
@@ -156,7 +156,7 @@ typedef struct sigaltstack {
static inline void sigaddset(sigset_t *set, int _sig)
{
asm ("bfset %0{%1,#1}"
- : "+od" (*set)
+ : "+o" (*set)
: "id" ((_sig - 1) ^ 31)
: "cc");
}
@@ -164,7 +164,7 @@ static inline void sigaddset(sigset_t *set, int _sig)
static inline void sigdelset(sigset_t *set, int _sig)
{
asm ("bfclr %0{%1,#1}"
- : "+od" (*set)
+ : "+o" (*set)
: "id" ((_sig - 1) ^ 31)
: "cc");
}
@@ -180,7 +180,7 @@ static inline int __gen_sigismember(sigset_t *set, int _sig)
int ret;
asm ("bfextu %1{%2,#1},%0"
: "=d" (ret)
- : "od" (*set), "id" ((_sig-1) ^ 31)
+ : "o" (*set), "id" ((_sig-1) ^ 31)
: "cc");
return ret;
}
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index 234f1d8..2e0a15b 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -20,7 +20,7 @@
#define PSW32_MASK_CC 0x00003000UL
#define PSW32_MASK_PM 0x00000f00UL

-#define PSW32_MASK_USER 0x00003F00UL
+#define PSW32_MASK_USER 0x0000FF00UL

#define PSW32_ADDR_AMODE 0x80000000UL
#define PSW32_ADDR_INSN 0x7FFFFFFFUL
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index aeb77f0..d3750e7 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -240,7 +240,7 @@ typedef struct
#define PSW_MASK_EA 0x00000000UL
#define PSW_MASK_BA 0x00000000UL

-#define PSW_MASK_USER 0x00003F00UL
+#define PSW_MASK_USER 0x0000FF00UL

#define PSW_ADDR_AMODE 0x80000000UL
#define PSW_ADDR_INSN 0x7FFFFFFFUL
@@ -269,7 +269,7 @@ typedef struct
#define PSW_MASK_EA 0x0000000100000000UL
#define PSW_MASK_BA 0x0000000080000000UL

-#define PSW_MASK_USER 0x00003F0180000000UL
+#define PSW_MASK_USER 0x0000FF0180000000UL

#define PSW_ADDR_AMODE 0x0000000000000000UL
#define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 28040fd..0bdca3a 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -313,6 +313,10 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) |
(__u64)(regs32.psw.mask & PSW32_MASK_USER) << 32 |
(__u64)(regs32.psw.addr & PSW32_ADDR_AMODE);
+ /* Check for invalid user address space control. */
+ if ((regs->psw.mask & PSW_MASK_ASC) >= (psw_kernel_bits & PSW_MASK_ASC))
+ regs->psw.mask = (psw_user_bits & PSW_MASK_ASC) |
+ (regs->psw.mask & ~PSW_MASK_ASC);
regs->psw.addr = (__u64)(regs32.psw.addr & PSW32_ADDR_INSN);
for (i = 0; i < NUM_GPRS; i++)
regs->gprs[i] = (__u64) regs32.gprs[i];
@@ -494,7 +498,10 @@ static int setup_frame32(int sig, struct k_sigaction *ka,

/* Set up registers for signal handler */
regs->gprs[15] = (__force __u64) frame;
- regs->psw.mask |= PSW_MASK_BA; /* force amode 31 */
+ /* Force 31 bit amode and default user address space control. */
+ regs->psw.mask = PSW_MASK_BA |
+ (psw_user_bits & PSW_MASK_ASC) |
+ (regs->psw.mask & ~PSW_MASK_ASC);
regs->psw.addr = (__force __u64) ka->sa.sa_handler;

regs->gprs[2] = map_signal(sig);
@@ -562,7 +569,10 @@ static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info,

/* Set up registers for signal handler */
regs->gprs[15] = (__force __u64) frame;
- regs->psw.mask |= PSW_MASK_BA; /* force amode 31 */
+ /* Force 31 bit amode and default user address space control. */
+ regs->psw.mask = PSW_MASK_BA |
+ (psw_user_bits & PSW_MASK_ASC) |
+ (regs->psw.mask & ~PSW_MASK_ASC);
regs->psw.addr = (__u64) ka->sa.sa_handler;

regs->gprs[2] = map_signal(sig);
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index f7582b2..74f58e2 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -148,6 +148,10 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
/* Use regs->psw.mask instead of psw_user_bits to preserve PER bit. */
regs->psw.mask = (regs->psw.mask & ~PSW_MASK_USER) |
(user_sregs.regs.psw.mask & PSW_MASK_USER);
+ /* Check for invalid user address space control. */
+ if ((regs->psw.mask & PSW_MASK_ASC) >= (psw_kernel_bits & PSW_MASK_ASC))
+ regs->psw.mask = (psw_user_bits & PSW_MASK_ASC) |
+ (regs->psw.mask & ~PSW_MASK_ASC);
/* Check for invalid amode */
if (regs->psw.mask & PSW_MASK_EA)
regs->psw.mask |= PSW_MASK_BA;
@@ -294,7 +298,10 @@ static int setup_frame(int sig, struct k_sigaction *ka,

/* Set up registers for signal handler */
regs->gprs[15] = (unsigned long) frame;
- regs->psw.mask |= PSW_MASK_EA | PSW_MASK_BA; /* 64 bit amode */
+ /* Force default amode and default user address space control. */
+ regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
+ (psw_user_bits & PSW_MASK_ASC) |
+ (regs->psw.mask & ~PSW_MASK_ASC);
regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE;

regs->gprs[2] = map_signal(sig);
@@ -367,7 +374,10 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,

/* Set up registers for signal handler */
regs->gprs[15] = (unsigned long) frame;
- regs->psw.mask |= PSW_MASK_EA | PSW_MASK_BA; /* 64 bit amode */
+ /* Force default amode and default user address space control. */
+ regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
+ (psw_user_bits & PSW_MASK_ASC) |
+ (regs->psw.mask & ~PSW_MASK_ASC);
regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE;

regs->gprs[2] = map_signal(sig);
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 65cb06e..4ccf9f5 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -183,7 +183,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
addr = start;
len = (unsigned long) nr_pages << PAGE_SHIFT;
end = start + len;
- if (end < start)
+ if ((end < start) || (end > TASK_SIZE))
goto slow_irqon;

/*
diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index 671d4d6..7bdd61b 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -137,13 +137,18 @@ static void cryptd_queue_worker(struct work_struct *work)
struct crypto_async_request *req, *backlog;

cpu_queue = container_of(work, struct cryptd_cpu_queue, work);
- /* Only handle one request at a time to avoid hogging crypto
- * workqueue. preempt_disable/enable is used to prevent
- * being preempted by cryptd_enqueue_request() */
+ /*
+ * Only handle one request at a time to avoid hogging crypto workqueue.
+ * preempt_disable/enable is used to prevent being preempted by
+ * cryptd_enqueue_request(). local_bh_disable/enable is used to prevent
+ * cryptd_enqueue_request() being accessed from software interrupts.
+ */
+ local_bh_disable();
preempt_disable();
backlog = crypto_get_backlog(&cpu_queue->queue);
req = crypto_dequeue_request(&cpu_queue->queue);
preempt_enable();
+ local_bh_enable();

if (!req)
return;
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 48b5a3c..62d9ee6 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -1345,12 +1345,15 @@ static int
acpi_video_bus_get_devices(struct acpi_video_bus *video,
struct acpi_device *device)
{
- int status;
+ int status = 0;
struct acpi_device *dev;

- status = acpi_video_device_enumerate(video);
- if (status)
- return status;
+ /*
+ * There are systems where video module known to work fine regardless
+ * of broken _DOD and ignoring returned value here doesn't cause
+ * any issues later.
+ */
+ acpi_video_device_enumerate(video);

list_for_each_entry(dev, &device->children, node) {

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 013c7a5..7b33136 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -175,8 +175,7 @@ struct rbd_device {
/* protects updating the header */
struct rw_semaphore header_rwsem;
char snap_name[RBD_MAX_SNAP_NAME_LEN];
- u32 cur_snap; /* index+1 of current snapshot within snap context
- 0 - for the head */
+ u64 snap_id; /* current snapshot id */
int read_only;

struct list_head node;
@@ -450,7 +449,9 @@ static void rbd_client_release(struct kref *kref)
struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref);

dout("rbd_release_client %p\n", rbdc);
+ spin_lock(&rbd_client_list_lock);
list_del(&rbdc->node);
+ spin_unlock(&rbd_client_list_lock);

ceph_destroy_client(rbdc->client);
kfree(rbdc->rbd_opts);
@@ -463,9 +464,7 @@ static void rbd_client_release(struct kref *kref)
*/
static void rbd_put_client(struct rbd_device *rbd_dev)
{
- spin_lock(&rbd_client_list_lock);
kref_put(&rbd_dev->rbd_client->kref, rbd_client_release);
- spin_unlock(&rbd_client_list_lock);
rbd_dev->rbd_client = NULL;
}

@@ -498,7 +497,7 @@ static int rbd_header_from_disk(struct rbd_image_header *header,

snap_count = le32_to_cpu(ondisk->snap_count);
header->snapc = kmalloc(sizeof(struct ceph_snap_context) +
- snap_count * sizeof (*ondisk),
+ snap_count * sizeof(u64),
gfp_flags);
if (!header->snapc)
return -ENOMEM;
@@ -552,21 +551,6 @@ err_snapc:
return -ENOMEM;
}

-static int snap_index(struct rbd_image_header *header, int snap_num)
-{
- return header->total_snaps - snap_num;
-}
-
-static u64 cur_snap_id(struct rbd_device *rbd_dev)
-{
- struct rbd_image_header *header = &rbd_dev->header;
-
- if (!rbd_dev->cur_snap)
- return 0;
-
- return header->snapc->snaps[snap_index(header, rbd_dev->cur_snap)];
-}
-
static int snap_by_name(struct rbd_image_header *header, const char *snap_name,
u64 *seq, u64 *size)
{
@@ -605,7 +589,7 @@ static int rbd_header_set_snap(struct rbd_device *dev, u64 *size)
snapc->seq = header->snap_seq;
else
snapc->seq = 0;
- dev->cur_snap = 0;
+ dev->snap_id = CEPH_NOSNAP;
dev->read_only = 0;
if (size)
*size = header->image_size;
@@ -613,8 +597,7 @@ static int rbd_header_set_snap(struct rbd_device *dev, u64 *size)
ret = snap_by_name(header, dev->snap_name, &snapc->seq, size);
if (ret < 0)
goto done;
-
- dev->cur_snap = header->total_snaps - ret;
+ dev->snap_id = snapc->seq;
dev->read_only = 1;
}

@@ -1521,7 +1504,7 @@ static void rbd_rq_fn(struct request_queue *q)
coll, cur_seg);
else
rbd_req_read(rq, rbd_dev,
- cur_snap_id(rbd_dev),
+ rbd_dev->snap_id,
ofs,
op_size, bio,
coll, cur_seg);
@@ -1656,7 +1639,7 @@ static int rbd_header_add_snap(struct rbd_device *dev,
struct ceph_mon_client *monc;

/* we should create a snapshot only if we're pointing at the head */
- if (dev->cur_snap)
+ if (dev->snap_id != CEPH_NOSNAP)
return -EINVAL;

monc = &dev->rbd_client->client->monc;
@@ -1683,7 +1666,9 @@ static int rbd_header_add_snap(struct rbd_device *dev,
if (ret < 0)
return ret;

- dev->header.snapc->seq = new_snapid;
+ down_write(&dev->header_rwsem);
+ dev->header.snapc->seq = new_snapid;
+ up_write(&dev->header_rwsem);

return 0;
bad:
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 80b331c..5ba5e66 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -427,9 +427,17 @@ static int intel_overlay_off(struct intel_overlay *overlay)
OUT_RING(flip_addr);
OUT_RING(MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
/* turn overlay off */
- OUT_RING(MI_OVERLAY_FLIP | MI_OVERLAY_OFF);
- OUT_RING(flip_addr);
- OUT_RING(MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
+ if (IS_I830(dev)) {
+ /* Workaround: Don't disable the overlay fully, since otherwise
+ * it dies on the next OVERLAY_ON cmd. */
+ OUT_RING(MI_NOOP);
+ OUT_RING(MI_NOOP);
+ OUT_RING(MI_NOOP);
+ } else {
+ OUT_RING(MI_OVERLAY_FLIP | MI_OVERLAY_OFF);
+ OUT_RING(flip_addr);
+ OUT_RING(MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP);
+ }
ADVANCE_LP_RING();

return intel_overlay_do_wait_request(overlay, request,
diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
index 2d07fbf..f6176bc 100644
--- a/drivers/gpu/drm/radeon/atombios_encoders.c
+++ b/drivers/gpu/drm/radeon/atombios_encoders.c
@@ -1421,7 +1421,7 @@ radeon_atom_encoder_dpms_dig(struct drm_encoder *encoder, int mode)
atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_SETUP, 0, 0);
atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0);
/* some early dce3.2 boards have a bug in their transmitter control table */
- if ((rdev->family != CHIP_RV710) || (rdev->family != CHIP_RV730))
+ if ((rdev->family != CHIP_RV710) && (rdev->family != CHIP_RV730))
atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE_OUTPUT, 0, 0);
}
if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(encoder)) && connector) {
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index ebc6fac..578207e 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -749,7 +749,10 @@ static int ttm_get_pages(struct page **pages, unsigned npages, int flags,
/* clear the pages coming from the pool if requested */
if (flags & TTM_PAGE_FLAG_ZERO_ALLOC) {
list_for_each_entry(p, &plist, lru) {
- clear_page(page_address(p));
+ if (PageHighMem(p))
+ clear_highpage(p);
+ else
+ clear_page(page_address(p));
}
}

diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index 589753f..2b78ddd 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -3079,8 +3079,10 @@ static irqreturn_t sky2_intr(int irq, void *dev_id)

/* Reading this mask interrupts as side effect */
status = sky2_read32(hw, B0_Y2_SP_ISRC2);
- if (status == 0 || status == ~0)
+ if (status == 0 || status == ~0) {
+ sky2_write32(hw, B0_Y2_SP_ICR, 2);
return IRQ_NONE;
+ }

prefetch(&hw->st_le[hw->st_idx]);

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 482dcd3..0dc70c2 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -73,7 +73,7 @@
static const int multicast_filter_limit = 32;

#define MAX_READ_REQUEST_SHIFT 12
-#define TX_DMA_BURST 6 /* Maximum PCI burst, '6' is 1024 */
+#define TX_DMA_BURST 7 /* Maximum PCI burst, '7' is unlimited */
#define SafeMtu 0x1c20 /* ... actually life sucks beyond ~7k */
#define InterFrameGap 0x03 /* 3 means InterFrameGap = the shortest one */

@@ -3488,6 +3488,8 @@ static void rtl_wol_suspend_quirk(struct rtl8169_private *tp)
void __iomem *ioaddr = tp->mmio_addr;

switch (tp->mac_version) {
+ case RTL_GIGA_MAC_VER_25:
+ case RTL_GIGA_MAC_VER_26:
case RTL_GIGA_MAC_VER_29:
case RTL_GIGA_MAC_VER_30:
case RTL_GIGA_MAC_VER_32:
@@ -4129,6 +4131,9 @@ static void rtl_set_rx_mode(struct net_device *dev)
mc_filter[1] = swab32(data);
}

+ if (tp->mac_version == RTL_GIGA_MAC_VER_35)
+ mc_filter[1] = mc_filter[0] = 0xffffffff;
+
RTL_W32(MAR0 + 4, mc_filter[1]);
RTL_W32(MAR0 + 0, mc_filter[0]);

diff --git a/drivers/staging/android/android_alarm.h b/drivers/staging/android/android_alarm.h
index 66b6e3d..6eecbde 100644
--- a/drivers/staging/android/android_alarm.h
+++ b/drivers/staging/android/android_alarm.h
@@ -110,12 +110,10 @@ enum android_alarm_return_flags {
#define ANDROID_ALARM_WAIT _IO('a', 1)

#define ALARM_IOW(c, type, size) _IOW('a', (c) | ((type) << 4), size)
-#define ALARM_IOR(c, type, size) _IOR('a', (c) | ((type) << 4), size)
-
/* Set alarm */
#define ANDROID_ALARM_SET(type) ALARM_IOW(2, type, struct timespec)
#define ANDROID_ALARM_SET_AND_WAIT(type) ALARM_IOW(3, type, struct timespec)
-#define ANDROID_ALARM_GET_TIME(type) ALARM_IOR(4, type, struct timespec)
+#define ANDROID_ALARM_GET_TIME(type) ALARM_IOW(4, type, struct timespec)
#define ANDROID_ALARM_SET_RTC _IOW('a', 5, struct timespec)
#define ANDROID_ALARM_BASE_CMD(cmd) (cmd & ~(_IOC(0, 0, 0xf0, 0)))
#define ANDROID_ALARM_IOCTL_TO_TYPE(cmd) (_IOC_NR(cmd) >> 4)
diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c
index 6189923..d00b38e 100644
--- a/drivers/tty/serial/omap-serial.c
+++ b/drivers/tty/serial/omap-serial.c
@@ -649,19 +649,19 @@ serial_omap_configure_xonxoff

/*
* IXON Flag:
- * Flow control for OMAP.TX
- * OMAP.RX should listen for XON/XOFF
+ * Enable XON/XOFF flow control on output.
+ * Transmit XON1, XOFF1
*/
if (termios->c_iflag & IXON)
- up->efr |= OMAP_UART_SW_RX;
+ up->efr |= OMAP_UART_SW_TX;

/*
* IXOFF Flag:
- * Flow control for OMAP.RX
- * OMAP.TX should send XON/XOFF
+ * Enable XON/XOFF flow control on input.
+ * Receiver compares XON1, XOFF1.
*/
if (termios->c_iflag & IXOFF)
- up->efr |= OMAP_UART_SW_TX;
+ up->efr |= OMAP_UART_SW_RX;

serial_out(up, UART_EFR, up->efr | UART_EFR_ECB);
serial_out(up, UART_LCR, UART_LCR_CONF_MODE_A);
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 17ec21e..43aa36b 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -157,6 +157,7 @@ static void option_instat_callback(struct urb *urb);
#define NOVATELWIRELESS_PRODUCT_EVDO_EMBEDDED_HIGHSPEED 0x8001
#define NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_FULLSPEED 0x9000
#define NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_HIGHSPEED 0x9001
+#define NOVATELWIRELESS_PRODUCT_E362 0x9010
#define NOVATELWIRELESS_PRODUCT_G1 0xA001
#define NOVATELWIRELESS_PRODUCT_G1_M 0xA002
#define NOVATELWIRELESS_PRODUCT_G2 0xA010
@@ -192,6 +193,9 @@ static void option_instat_callback(struct urb *urb);
#define DELL_PRODUCT_5730_MINICARD_TELUS 0x8181
#define DELL_PRODUCT_5730_MINICARD_VZW 0x8182

+#define DELL_PRODUCT_5800_MINICARD_VZW 0x8195 /* Novatel E362 */
+#define DELL_PRODUCT_5800_V2_MINICARD_VZW 0x8196 /* Novatel E362 */
+
#define KYOCERA_VENDOR_ID 0x0c88
#define KYOCERA_PRODUCT_KPC650 0x17da
#define KYOCERA_PRODUCT_KPC680 0x180a
@@ -282,6 +286,7 @@ static void option_instat_callback(struct urb *urb);
/* ALCATEL PRODUCTS */
#define ALCATEL_VENDOR_ID 0x1bbb
#define ALCATEL_PRODUCT_X060S_X200 0x0000
+#define ALCATEL_PRODUCT_X220_X500D 0x0017

#define PIRELLI_VENDOR_ID 0x1266
#define PIRELLI_PRODUCT_C100_1 0x1002
@@ -705,6 +710,7 @@ static const struct usb_device_id option_ids[] = {
{ USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_G2) },
/* Novatel Ovation MC551 a.k.a. Verizon USB551L */
{ USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC551, 0xff, 0xff, 0xff) },
+ { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_E362, 0xff, 0xff, 0xff) },

{ USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01) },
{ USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01A) },
@@ -727,6 +733,8 @@ static const struct usb_device_id option_ids[] = {
{ USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5730_MINICARD_SPRINT) }, /* Dell Wireless 5730 Mobile Broadband EVDO/HSPA Mini-Card */
{ USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5730_MINICARD_TELUS) }, /* Dell Wireless 5730 Mobile Broadband EVDO/HSPA Mini-Card */
{ USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5730_MINICARD_VZW) }, /* Dell Wireless 5730 Mobile Broadband EVDO/HSPA Mini-Card */
+ { USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, DELL_PRODUCT_5800_MINICARD_VZW, 0xff, 0xff, 0xff) },
+ { USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, DELL_PRODUCT_5800_V2_MINICARD_VZW, 0xff, 0xff, 0xff) },
{ USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_E100A) }, /* ADU-E100, ADU-310 */
{ USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_500A) },
{ USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_620UW) },
@@ -1156,6 +1164,7 @@ static const struct usb_device_id option_ids[] = {
{ USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X060S_X200),
.driver_info = (kernel_ulong_t)&alcatel_x200_blacklist
},
+ { USB_DEVICE(ALCATEL_VENDOR_ID, ALCATEL_PRODUCT_X220_X500D) },
{ USB_DEVICE(AIRPLUS_VENDOR_ID, AIRPLUS_PRODUCT_MCD650) },
{ USB_DEVICE(TLAYTECH_VENDOR_ID, TLAYTECH_PRODUCT_TEU800) },
{ USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W14),
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index bcf2617..c627ba2 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -768,7 +768,7 @@ int usb_serial_probe(struct usb_interface *interface,

if (retval) {
dbg("sub driver rejected device");
- kfree(serial);
+ usb_serial_put(serial);
module_put(type->driver.owner);
return retval;
}
@@ -840,7 +840,7 @@ int usb_serial_probe(struct usb_interface *interface,
*/
if (num_bulk_in == 0 || num_bulk_out == 0) {
dev_info(&interface->dev, "PL-2303 hack: descriptors matched but endpoints did not\n");
- kfree(serial);
+ usb_serial_put(serial);
module_put(type->driver.owner);
return -ENODEV;
}
@@ -854,7 +854,7 @@ int usb_serial_probe(struct usb_interface *interface,
if (num_ports == 0) {
dev_err(&interface->dev,
"Generic device with no bulk out, not allowed.\n");
- kfree(serial);
+ usb_serial_put(serial);
module_put(type->driver.owner);
return -EIO;
}
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 6908e4c..26c47a4 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -1365,8 +1365,8 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);

- exit_idle();
irq_enter();
+ exit_idle();

__xen_evtchn_do_upcall();

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 173b1d2..32ee086 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -54,7 +54,12 @@
(CONGESTION_ON_THRESH(congestion_kb) - \
(CONGESTION_ON_THRESH(congestion_kb) >> 2))

-
+static inline struct ceph_snap_context *page_snap_context(struct page *page)
+{
+ if (PagePrivate(page))
+ return (void *)page->private;
+ return NULL;
+}

/*
* Dirty a page. Optimistically adjust accounting, on the assumption
@@ -142,10 +147,9 @@ static void ceph_invalidatepage(struct page *page, unsigned long offset)
{
struct inode *inode;
struct ceph_inode_info *ci;
- struct ceph_snap_context *snapc = (void *)page->private;
+ struct ceph_snap_context *snapc = page_snap_context(page);

BUG_ON(!PageLocked(page));
- BUG_ON(!page->private);
BUG_ON(!PagePrivate(page));
BUG_ON(!page->mapping);

@@ -182,7 +186,6 @@ static int ceph_releasepage(struct page *page, gfp_t g)
struct inode *inode = page->mapping ? page->mapping->host : NULL;
dout("%p releasepage %p idx %lu\n", inode, page, page->index);
WARN_ON(PageDirty(page));
- WARN_ON(page->private);
WARN_ON(PagePrivate(page));
return 0;
}
@@ -202,7 +205,7 @@ static int readpage_nounlock(struct file *filp, struct page *page)
dout("readpage inode %p file %p page %p index %lu\n",
inode, filp, page, page->index);
err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
- page->index << PAGE_CACHE_SHIFT, &len,
+ (u64) page_offset(page), &len,
ci->i_truncate_seq, ci->i_truncate_size,
&page, 1, 0);
if (err == -ENOENT)
@@ -283,7 +286,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
int nr_pages = 0;
int ret;

- off = page->index << PAGE_CACHE_SHIFT;
+ off = (u64) page_offset(page);

/* count pages */
next_index = page->index;
@@ -423,7 +426,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
struct ceph_inode_info *ci;
struct ceph_fs_client *fsc;
struct ceph_osd_client *osdc;
- loff_t page_off = page->index << PAGE_CACHE_SHIFT;
+ loff_t page_off = page_offset(page);
int len = PAGE_CACHE_SIZE;
loff_t i_size;
int err = 0;
@@ -443,7 +446,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
osdc = &fsc->client->osdc;

/* verify this is a writeable snap context */
- snapc = (void *)page->private;
+ snapc = page_snap_context(page);
if (snapc == NULL) {
dout("writepage %p page %p not dirty?\n", inode, page);
goto out;
@@ -451,7 +454,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
oldest = get_oldest_context(inode, &snap_size);
if (snapc->seq > oldest->seq) {
dout("writepage %p page %p snapc %p not writeable - noop\n",
- inode, page, (void *)page->private);
+ inode, page, snapc);
/* we should only noop if called by kswapd */
WARN_ON((current->flags & PF_MEMALLOC) == 0);
ceph_put_snap_context(oldest);
@@ -591,7 +594,7 @@ static void writepages_finish(struct ceph_osd_request *req,
clear_bdi_congested(&fsc->backing_dev_info,
BLK_RW_ASYNC);

- ceph_put_snap_context((void *)page->private);
+ ceph_put_snap_context(page_snap_context(page));
page->private = 0;
ClearPagePrivate(page);
dout("unlocking %d %p\n", i, page);
@@ -795,7 +798,7 @@ get_more_pages:
}

/* only if matching snap context */
- pgsnapc = (void *)page->private;
+ pgsnapc = page_snap_context(page);
if (pgsnapc->seq > snapc->seq) {
dout("page snapc %p %lld > oldest %p %lld\n",
pgsnapc, pgsnapc->seq, snapc, snapc->seq);
@@ -814,8 +817,7 @@ get_more_pages:
/* ok */
if (locked_pages == 0) {
/* prepare async write request */
- offset = (unsigned long long)page->index
- << PAGE_CACHE_SHIFT;
+ offset = (u64) page_offset(page);
len = wsize;
req = ceph_osdc_new_request(&fsc->client->osdc,
&ci->i_layout,
@@ -984,7 +986,7 @@ retry_locked:
BUG_ON(!ci->i_snap_realm);
down_read(&mdsc->snap_rwsem);
BUG_ON(!ci->i_snap_realm->cached_context);
- snapc = (void *)page->private;
+ snapc = page_snap_context(page);
if (snapc && snapc != ci->i_head_snapc) {
/*
* this page is already dirty in another (older) snap
@@ -1177,7 +1179,7 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
struct inode *inode = vma->vm_file->f_dentry->d_inode;
struct page *page = vmf->page;
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
- loff_t off = page->index << PAGE_CACHE_SHIFT;
+ loff_t off = page_offset(page);
loff_t size, len;
int ret;

diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index fb962ef..6d59006 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -201,6 +201,7 @@ int ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
int err = -ENOMEM;

dout("ceph_fs_debugfs_init\n");
+ BUG_ON(!fsc->client->debugfs_dir);
fsc->debugfs_congestion_kb =
debugfs_create_file("writeback_congestion_kb",
0600,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 89971e1..7f1682d 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -334,10 +334,10 @@ void ceph_put_mds_session(struct ceph_mds_session *s)
dout("mdsc put_session %p %d -> %d\n", s,
atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1);
if (atomic_dec_and_test(&s->s_ref)) {
- if (s->s_authorizer)
+ if (s->s_auth.authorizer)
s->s_mdsc->fsc->client->monc.auth->ops->destroy_authorizer(
s->s_mdsc->fsc->client->monc.auth,
- s->s_authorizer);
+ s->s_auth.authorizer);
kfree(s);
}
}
@@ -394,11 +394,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
s->s_seq = 0;
mutex_init(&s->s_mutex);

- ceph_con_init(mdsc->fsc->client->msgr, &s->s_con);
- s->s_con.private = s;
- s->s_con.ops = &mds_con_ops;
- s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS;
- s->s_con.peer_name.num = cpu_to_le64(mds);
+ ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr);

spin_lock_init(&s->s_gen_ttl_lock);
s->s_cap_gen = 0;
@@ -440,7 +436,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
mdsc->sessions[mds] = s;
atomic_inc(&s->s_ref); /* one ref to sessions[], one to caller */

- ceph_con_open(&s->s_con, ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
+ ceph_con_open(&s->s_con, CEPH_ENTITY_TYPE_MDS, mds,
+ ceph_mdsmap_get_addr(mdsc->mdsmap, mds));

return s;

@@ -2532,6 +2529,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
session->s_seq = 0;

ceph_con_open(&session->s_con,
+ CEPH_ENTITY_TYPE_MDS, mds,
ceph_mdsmap_get_addr(mdsc->mdsmap, mds));

/* replay unsafe requests */
@@ -2636,7 +2634,8 @@ static void check_new_map(struct ceph_mds_client *mdsc,
ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "",
session_state_name(s->s_state));

- if (memcmp(ceph_mdsmap_get_addr(oldmap, i),
+ if (i >= newmap->m_max_mds ||
+ memcmp(ceph_mdsmap_get_addr(oldmap, i),
ceph_mdsmap_get_addr(newmap, i),
sizeof(struct ceph_entity_addr))) {
if (s->s_state == CEPH_MDS_SESSION_OPENING) {
@@ -3395,39 +3394,33 @@ out:
/*
* authentication
*/
-static int get_authorizer(struct ceph_connection *con,
- void **buf, int *len, int *proto,
- void **reply_buf, int *reply_len, int force_new)
+
+/*
+ * Note: returned pointer is the address of a structure that's
+ * managed separately. Caller must *not* attempt to free it.
+ */
+static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
+ int *proto, int force_new)
{
struct ceph_mds_session *s = con->private;
struct ceph_mds_client *mdsc = s->s_mdsc;
struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
- int ret = 0;
-
- if (force_new && s->s_authorizer) {
- ac->ops->destroy_authorizer(ac, s->s_authorizer);
- s->s_authorizer = NULL;
- }
- if (s->s_authorizer == NULL) {
- if (ac->ops->create_authorizer) {
- ret = ac->ops->create_authorizer(
- ac, CEPH_ENTITY_TYPE_MDS,
- &s->s_authorizer,
- &s->s_authorizer_buf,
- &s->s_authorizer_buf_len,
- &s->s_authorizer_reply_buf,
- &s->s_authorizer_reply_buf_len);
- if (ret)
- return ret;
- }
- }
+ struct ceph_auth_handshake *auth = &s->s_auth;

+ if (force_new && auth->authorizer) {
+ if (ac->ops && ac->ops->destroy_authorizer)
+ ac->ops->destroy_authorizer(ac, auth->authorizer);
+ auth->authorizer = NULL;
+ }
+ if (!auth->authorizer && ac->ops && ac->ops->create_authorizer) {
+ int ret = ac->ops->create_authorizer(ac, CEPH_ENTITY_TYPE_MDS,
+ auth);
+ if (ret)
+ return ERR_PTR(ret);
+ }
*proto = ac->protocol;
- *buf = s->s_authorizer_buf;
- *len = s->s_authorizer_buf_len;
- *reply_buf = s->s_authorizer_reply_buf;
- *reply_len = s->s_authorizer_reply_buf_len;
- return 0;
+
+ return auth;
}


@@ -3437,7 +3430,7 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len)
struct ceph_mds_client *mdsc = s->s_mdsc;
struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;

- return ac->ops->verify_authorizer_reply(ac, s->s_authorizer, len);
+ return ac->ops->verify_authorizer_reply(ac, s->s_auth.authorizer, len);
}

static int invalidate_authorizer(struct ceph_connection *con)
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 8c7c04e..dd26846 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -11,6 +11,7 @@
#include <linux/ceph/types.h>
#include <linux/ceph/messenger.h>
#include <linux/ceph/mdsmap.h>
+#include <linux/ceph/auth.h>

/*
* Some lock dependencies:
@@ -113,9 +114,7 @@ struct ceph_mds_session {

struct ceph_connection s_con;

- struct ceph_authorizer *s_authorizer;
- void *s_authorizer_buf, *s_authorizer_reply_buf;
- size_t s_authorizer_buf_len, s_authorizer_reply_buf_len;
+ struct ceph_auth_handshake s_auth;

/* protected by s_gen_ttl_lock */
spinlock_t s_gen_ttl_lock;
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 3cc1b25..6ccf176 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -225,6 +225,13 @@ sid_to_str(struct cifs_sid *sidptr, char *sidstr)
}

static void
+cifs_copy_sid(struct cifs_sid *dst, const struct cifs_sid *src)
+{
+ memcpy(dst, src, sizeof(*dst));
+ dst->num_subauth = min_t(u8, src->num_subauth, NUM_SUBAUTHS);
+}
+
+static void
id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
struct cifs_sid_id **psidid, char *typestr)
{
@@ -248,7 +255,7 @@ id_rb_insert(struct rb_root *root, struct cifs_sid *sidptr,
}
}

- memcpy(&(*psidid)->sid, sidptr, sizeof(struct cifs_sid));
+ cifs_copy_sid(&(*psidid)->sid, sidptr);
(*psidid)->time = jiffies - (SID_MAP_RETRY + 1);
(*psidid)->refcount = 0;

@@ -354,7 +361,7 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
* any fields of the node after a reference is put .
*/
if (test_bit(SID_ID_MAPPED, &psidid->state)) {
- memcpy(ssid, &psidid->sid, sizeof(struct cifs_sid));
+ cifs_copy_sid(ssid, &psidid->sid);
psidid->time = jiffies; /* update ts for accessing */
goto id_sid_out;
}
@@ -370,14 +377,14 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
if (IS_ERR(sidkey)) {
rc = -EINVAL;
cFYI(1, "%s: Can't map and id to a SID", __func__);
+ } else if (sidkey->datalen < sizeof(struct cifs_sid)) {
+ rc = -EIO;
+ cFYI(1, "%s: Downcall contained malformed key "
+ "(datalen=%hu)", __func__, sidkey->datalen);
} else {
lsid = (struct cifs_sid *)sidkey->payload.data;
- memcpy(&psidid->sid, lsid,
- sidkey->datalen < sizeof(struct cifs_sid) ?
- sidkey->datalen : sizeof(struct cifs_sid));
- memcpy(ssid, &psidid->sid,
- sidkey->datalen < sizeof(struct cifs_sid) ?
- sidkey->datalen : sizeof(struct cifs_sid));
+ cifs_copy_sid(&psidid->sid, lsid);
+ cifs_copy_sid(ssid, &psidid->sid);
set_bit(SID_ID_MAPPED, &psidid->state);
key_put(sidkey);
kfree(psidid->sidstr);
@@ -396,7 +403,7 @@ id_to_sid(unsigned long cid, uint sidtype, struct cifs_sid *ssid)
return rc;
}
if (test_bit(SID_ID_MAPPED, &psidid->state))
- memcpy(ssid, &psidid->sid, sizeof(struct cifs_sid));
+ cifs_copy_sid(ssid, &psidid->sid);
else
rc = -EINVAL;
}
@@ -675,8 +682,6 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
static void copy_sec_desc(const struct cifs_ntsd *pntsd,
struct cifs_ntsd *pnntsd, __u32 sidsoffset)
{
- int i;
-
struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
struct cifs_sid *nowner_sid_ptr, *ngroup_sid_ptr;

@@ -692,26 +697,14 @@ static void copy_sec_desc(const struct cifs_ntsd *pntsd,
owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
le32_to_cpu(pntsd->osidoffset));
nowner_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset);
-
- nowner_sid_ptr->revision = owner_sid_ptr->revision;
- nowner_sid_ptr->num_subauth = owner_sid_ptr->num_subauth;
- for (i = 0; i < 6; i++)
- nowner_sid_ptr->authority[i] = owner_sid_ptr->authority[i];
- for (i = 0; i < 5; i++)
- nowner_sid_ptr->sub_auth[i] = owner_sid_ptr->sub_auth[i];
+ cifs_copy_sid(nowner_sid_ptr, owner_sid_ptr);

/* copy group sid */
group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
le32_to_cpu(pntsd->gsidoffset));
ngroup_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset +
sizeof(struct cifs_sid));
-
- ngroup_sid_ptr->revision = group_sid_ptr->revision;
- ngroup_sid_ptr->num_subauth = group_sid_ptr->num_subauth;
- for (i = 0; i < 6; i++)
- ngroup_sid_ptr->authority[i] = group_sid_ptr->authority[i];
- for (i = 0; i < 5; i++)
- ngroup_sid_ptr->sub_auth[i] = group_sid_ptr->sub_auth[i];
+ cifs_copy_sid(ngroup_sid_ptr, group_sid_ptr);

return;
}
@@ -1120,8 +1113,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
kfree(nowner_sid_ptr);
return rc;
}
- memcpy(owner_sid_ptr, nowner_sid_ptr,
- sizeof(struct cifs_sid));
+ cifs_copy_sid(owner_sid_ptr, nowner_sid_ptr);
kfree(nowner_sid_ptr);
*aclflag = CIFS_ACL_OWNER;
}
@@ -1139,8 +1131,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
kfree(ngroup_sid_ptr);
return rc;
}
- memcpy(group_sid_ptr, ngroup_sid_ptr,
- sizeof(struct cifs_sid));
+ cifs_copy_sid(group_sid_ptr, ngroup_sid_ptr);
kfree(ngroup_sid_ptr);
*aclflag = CIFS_ACL_GROUP;
}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 0f04d2e..240832e 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -280,6 +280,7 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
char *fnek_src;
char *cipher_key_bytes_src;
char *fn_cipher_key_bytes_src;
+ u8 cipher_code;

*check_ruid = 0;

@@ -421,6 +422,18 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
&& !fn_cipher_key_bytes_set)
mount_crypt_stat->global_default_fn_cipher_key_bytes =
mount_crypt_stat->global_default_cipher_key_size;
+
+ cipher_code = ecryptfs_code_for_cipher_string(
+ mount_crypt_stat->global_default_cipher_name,
+ mount_crypt_stat->global_default_cipher_key_size);
+ if (!cipher_code) {
+ ecryptfs_printk(KERN_ERR,
+ "eCryptfs doesn't support cipher: %s",
+ mount_crypt_stat->global_default_cipher_name);
+ rc = -EINVAL;
+ goto out;
+ }
+
mutex_lock(&key_tfm_list_mutex);
if (!ecryptfs_tfm_exists(mount_crypt_stat->global_default_cipher_name,
NULL)) {
@@ -506,7 +519,6 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
goto out;
}

- s->s_flags = flags;
rc = bdi_setup_and_register(&sbi->bdi, "ecryptfs", BDI_CAP_MAP_COPY);
if (rc)
goto out1;
@@ -542,6 +554,15 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
}

ecryptfs_set_superblock_lower(s, path.dentry->d_sb);
+
+ /**
+ * Set the POSIX ACL flag based on whether they're enabled in the lower
+ * mount. Force a read-only eCryptfs mount if the lower mount is ro.
+ * Allow a ro eCryptfs mount even when the lower mount is rw.
+ */
+ s->s_flags = flags & ~MS_POSIXACL;
+ s->s_flags |= path.dentry->d_sb->s_flags & (MS_RDONLY | MS_POSIXACL);
+
s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
s->s_blocksize = path.dentry->d_sb->s_blocksize;
s->s_magic = ECRYPTFS_SUPER_MAGIC;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5e80180..8955e36 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -307,8 +307,7 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
dprintk("%s ERROR: %d Reset session\n", __func__,
errorcode);
nfs4_schedule_session_recovery(clp->cl_session);
- exception->retry = 1;
- break;
+ goto wait_on_recovery;
#endif /* defined(CONFIG_NFS_V4_1) */
case -NFS4ERR_FILE_OPEN:
if (exception->timeout > HZ) {
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index f35794b..a506360 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -21,6 +21,7 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
if ((old->path.mnt == new->path.mnt) &&
(old->path.dentry == new->path.dentry))
return true;
+ break;
case (FSNOTIFY_EVENT_NONE):
return true;
default:
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index f99c1b4..c11db51 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1788,8 +1788,9 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,

BUG_ON(!th->t_trans_id);

- dquot_initialize(inode);
+ reiserfs_write_unlock(inode->i_sb);
err = dquot_alloc_inode(inode);
+ reiserfs_write_lock(inode->i_sb);
if (err)
goto out_end_trans;
if (!dir->i_nlink) {
@@ -1985,8 +1986,10 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,

out_end_trans:
journal_end(th, th->t_super, th->t_blocks_allocated);
+ reiserfs_write_unlock(inode->i_sb);
/* Drop can be outside and it needs more credits so it's better to have it outside */
dquot_drop(inode);
+ reiserfs_write_lock(inode->i_sb);
inode->i_flags |= S_NOQUOTA;
make_bad_inode(inode);

@@ -3109,10 +3112,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
/* must be turned off for recursive notify_change calls */
ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);

- depth = reiserfs_write_lock_once(inode->i_sb);
if (is_quota_modification(inode, attr))
dquot_initialize(inode);
-
+ depth = reiserfs_write_lock_once(inode->i_sb);
if (attr->ia_valid & ATTR_SIZE) {
/* version 2 items will be caught by the s_maxbytes check
** done for us in vmtruncate
@@ -3176,7 +3178,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
error = journal_begin(&th, inode->i_sb, jbegin_count);
if (error)
goto out;
+ reiserfs_write_unlock_once(inode->i_sb, depth);
error = dquot_transfer(inode, attr);
+ depth = reiserfs_write_lock_once(inode->i_sb);
if (error) {
journal_end(&th, inode->i_sb, jbegin_count);
goto out;
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index f8afa4b..2f40a4c 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -1968,7 +1968,9 @@ int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct tree
key2type(&(key->on_disk_key)));
#endif

+ reiserfs_write_unlock(inode->i_sb);
retval = dquot_alloc_space_nodirty(inode, pasted_size);
+ reiserfs_write_lock(inode->i_sb);
if (retval) {
pathrelse(search_path);
return retval;
@@ -2061,9 +2063,11 @@ int reiserfs_insert_item(struct reiserfs_transaction_handle *th,
"reiserquota insert_item(): allocating %u id=%u type=%c",
quota_bytes, inode->i_uid, head2type(ih));
#endif
+ reiserfs_write_unlock(inode->i_sb);
/* We can't dirty inode here. It would be immediately written but
* appropriate stat item isn't inserted yet... */
retval = dquot_alloc_space_nodirty(inode, quota_bytes);
+ reiserfs_write_lock(inode->i_sb);
if (retval) {
pathrelse(path);
return retval;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 8b7616e..8169be9 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -256,7 +256,9 @@ static int finish_unfinished(struct super_block *s)
retval = remove_save_link_only(s, &save_link_key, 0);
continue;
}
+ reiserfs_write_unlock(s);
dquot_initialize(inode);
+ reiserfs_write_lock(s);

if (truncate && S_ISDIR(inode->i_mode)) {
/* We got a truncate request for a dir which is impossible.
@@ -1292,7 +1294,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
kfree(qf_names[i]);
#endif
err = -EINVAL;
- goto out_err;
+ goto out_unlock;
}
#ifdef CONFIG_QUOTA
handle_quota_files(s, qf_names, &qfmt);
@@ -1336,7 +1338,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
if (blocks) {
err = reiserfs_resize(s, blocks);
if (err != 0)
- goto out_err;
+ goto out_unlock;
}

if (*mount_flags & MS_RDONLY) {
@@ -1346,9 +1348,15 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
/* it is read-only already */
goto out_ok;

+ /*
+ * Drop write lock. Quota will retake it when needed and lock
+ * ordering requires calling dquot_suspend() without it.
+ */
+ reiserfs_write_unlock(s);
err = dquot_suspend(s, -1);
if (err < 0)
goto out_err;
+ reiserfs_write_lock(s);

/* try to remount file system with read-only permissions */
if (sb_umount_state(rs) == REISERFS_VALID_FS
@@ -1358,7 +1366,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)

err = journal_begin(&th, s, 10);
if (err)
- goto out_err;
+ goto out_unlock;

/* Mounting a rw partition read-only. */
reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
@@ -1373,7 +1381,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)

if (reiserfs_is_journal_aborted(journal)) {
err = journal->j_errno;
- goto out_err;
+ goto out_unlock;
}

handle_data_mode(s, mount_options);
@@ -1382,7 +1390,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
s->s_flags &= ~MS_RDONLY; /* now it is safe to call journal_begin */
err = journal_begin(&th, s, 10);
if (err)
- goto out_err;
+ goto out_unlock;

/* Mount a partition which is read-only, read-write */
reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
@@ -1399,11 +1407,17 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
SB_JOURNAL(s)->j_must_wait = 1;
err = journal_end(&th, s, 10);
if (err)
- goto out_err;
+ goto out_unlock;
s->s_dirt = 0;

if (!(*mount_flags & MS_RDONLY)) {
+ /*
+ * Drop write lock. Quota will retake it when needed and lock
+ * ordering requires calling dquot_resume() without it.
+ */
+ reiserfs_write_unlock(s);
dquot_resume(s, -1);
+ reiserfs_write_lock(s);
finish_unfinished(s);
reiserfs_xattr_init(s, *mount_flags);
}
@@ -1413,9 +1427,10 @@ out_ok:
reiserfs_write_unlock(s);
return 0;

+out_unlock:
+ reiserfs_write_unlock(s);
out_err:
kfree(new_opts);
- reiserfs_write_unlock(s);
return err;
}

@@ -2049,13 +2064,15 @@ static int reiserfs_write_dquot(struct dquot *dquot)
REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
if (ret)
goto out;
+ reiserfs_write_unlock(dquot->dq_sb);
ret = dquot_commit(dquot);
+ reiserfs_write_lock(dquot->dq_sb);
err =
journal_end(&th, dquot->dq_sb,
REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
if (!ret && err)
ret = err;
- out:
+out:
reiserfs_write_unlock(dquot->dq_sb);
return ret;
}
@@ -2071,13 +2088,15 @@ static int reiserfs_acquire_dquot(struct dquot *dquot)
REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
if (ret)
goto out;
+ reiserfs_write_unlock(dquot->dq_sb);
ret = dquot_acquire(dquot);
+ reiserfs_write_lock(dquot->dq_sb);
err =
journal_end(&th, dquot->dq_sb,
REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb));
if (!ret && err)
ret = err;
- out:
+out:
reiserfs_write_unlock(dquot->dq_sb);
return ret;
}
@@ -2091,19 +2110,21 @@ static int reiserfs_release_dquot(struct dquot *dquot)
ret =
journal_begin(&th, dquot->dq_sb,
REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
+ reiserfs_write_unlock(dquot->dq_sb);
if (ret) {
/* Release dquot anyway to avoid endless cycle in dqput() */
dquot_release(dquot);
goto out;
}
ret = dquot_release(dquot);
+ reiserfs_write_lock(dquot->dq_sb);
err =
journal_end(&th, dquot->dq_sb,
REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb));
if (!ret && err)
ret = err;
- out:
reiserfs_write_unlock(dquot->dq_sb);
+out:
return ret;
}

@@ -2128,11 +2149,13 @@ static int reiserfs_write_info(struct super_block *sb, int type)
ret = journal_begin(&th, sb, 2);
if (ret)
goto out;
+ reiserfs_write_unlock(sb);
ret = dquot_commit_info(sb, type);
+ reiserfs_write_lock(sb);
err = journal_end(&th, sb, 2);
if (!ret && err)
ret = err;
- out:
+out:
reiserfs_write_unlock(sb);
return ret;
}
@@ -2157,8 +2180,11 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
struct reiserfs_transaction_handle th;
int opt = type == USRQUOTA ? REISERFS_USRQUOTA : REISERFS_GRPQUOTA;

- if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt)))
- return -EINVAL;
+ reiserfs_write_lock(sb);
+ if (!(REISERFS_SB(sb)->s_mount_opt & (1 << opt))) {
+ err = -EINVAL;
+ goto out;
+ }

/* Quotafile not on the same filesystem? */
if (path->dentry->d_sb != sb) {
@@ -2200,8 +2226,10 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
if (err)
goto out;
}
- err = dquot_quota_on(sb, type, format_id, path);
+ reiserfs_write_unlock(sb);
+ return dquot_quota_on(sb, type, format_id, path);
out:
+ reiserfs_write_unlock(sb);
return err;
}

@@ -2275,7 +2303,9 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
tocopy = sb->s_blocksize - offset < towrite ?
sb->s_blocksize - offset : towrite;
tmp_bh.b_state = 0;
+ reiserfs_write_lock(sb);
err = reiserfs_get_block(inode, blk, &tmp_bh, GET_BLOCK_CREATE);
+ reiserfs_write_unlock(sb);
if (err)
goto out;
if (offset || tocopy != sb->s_blocksize)
@@ -2291,10 +2321,12 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type,
flush_dcache_page(bh->b_page);
set_buffer_uptodate(bh);
unlock_buffer(bh);
+ reiserfs_write_lock(sb);
reiserfs_prepare_for_journal(sb, bh, 1);
journal_mark_dirty(current->journal_info, sb, bh);
if (!journal_quota)
reiserfs_add_ordered_list(inode, bh);
+ reiserfs_write_unlock(sb);
brelse(bh);
offset = 0;
towrite -= tocopy;
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 2559d17..5dc48ca 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -681,8 +681,16 @@ int ubifs_find_free_leb_for_idx(struct ubifs_info *c)
if (!lprops) {
lprops = ubifs_fast_find_freeable(c);
if (!lprops) {
- ubifs_assert(c->freeable_cnt == 0);
- if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) {
+ /*
+ * The first condition means the following: go scan the
+ * LPT if there are uncategorized lprops, which means
+ * there may be freeable LEBs there (UBIFS does not
+ * store the information about freeable LEBs in the
+ * master node).
+ */
+ if (c->in_a_category_cnt != c->main_lebs ||
+ c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) {
+ ubifs_assert(c->freeable_cnt == 0);
lprops = scan_for_leb_for_idx(c);
if (IS_ERR(lprops)) {
err = PTR_ERR(lprops);
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index f8a181e..ea9d491 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -300,8 +300,11 @@ void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
default:
ubifs_assert(0);
}
+
lprops->flags &= ~LPROPS_CAT_MASK;
lprops->flags |= cat;
+ c->in_a_category_cnt += 1;
+ ubifs_assert(c->in_a_category_cnt <= c->main_lebs);
}

/**
@@ -334,6 +337,9 @@ static void ubifs_remove_from_cat(struct ubifs_info *c,
default:
ubifs_assert(0);
}
+
+ c->in_a_category_cnt -= 1;
+ ubifs_assert(c->in_a_category_cnt >= 0);
}

/**
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 93d59ac..4971cb2 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1184,6 +1184,8 @@ struct ubifs_debug_info;
* @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size)
* @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size)
* @freeable_cnt: number of freeable LEBs in @freeable_list
+ * @in_a_category_cnt: count of lprops which are in a certain category, which
+ * basically meants that they were loaded from the flash
*
* @ltab_lnum: LEB number of LPT's own lprops table
* @ltab_offs: offset of LPT's own lprops table
@@ -1413,6 +1415,7 @@ struct ubifs_info {
struct list_head freeable_list;
struct list_head frdi_idx_list;
int freeable_cnt;
+ int in_a_category_cnt;

int ltab_lnum;
int ltab_offs;
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 6819b51..bb76128 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1165,9 +1165,14 @@ xfs_buf_bio_end_io(
{
xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private;

- xfs_buf_ioerror(bp, -error);
+ /*
+ * don't overwrite existing errors - otherwise we can lose errors on
+ * buffers that require multiple bios to complete.
+ */
+ if (!bp->b_error)
+ xfs_buf_ioerror(bp, -error);

- if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
+ if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));

_xfs_buf_ioend(bp, 1);
@@ -1243,6 +1248,11 @@ next_chunk:
if (size)
goto next_chunk;
} else {
+ /*
+ * This is guaranteed not to be the last io reference count
+ * because the caller (xfs_buf_iorequest) holds a count itself.
+ */
+ atomic_dec(&bp->b_io_remaining);
xfs_buf_ioerror(bp, EIO);
bio_put(bio);
}
diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h
index aa13392..d4080f3 100644
--- a/include/linux/ceph/auth.h
+++ b/include/linux/ceph/auth.h
@@ -14,6 +14,14 @@
struct ceph_auth_client;
struct ceph_authorizer;

+struct ceph_auth_handshake {
+ struct ceph_authorizer *authorizer;
+ void *authorizer_buf;
+ size_t authorizer_buf_len;
+ void *authorizer_reply_buf;
+ size_t authorizer_reply_buf_len;
+};
+
struct ceph_auth_client_ops {
const char *name;

@@ -43,9 +51,7 @@ struct ceph_auth_client_ops {
* the response to authenticate the service.
*/
int (*create_authorizer)(struct ceph_auth_client *ac, int peer_type,
- struct ceph_authorizer **a,
- void **buf, size_t *len,
- void **reply_buf, size_t *reply_len);
+ struct ceph_auth_handshake *auth);
int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
struct ceph_authorizer *a, size_t len);
void (*destroy_authorizer)(struct ceph_auth_client *ac,
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index e71d683..98ec36a 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -132,7 +132,7 @@ struct ceph_client {
u32 supported_features;
u32 required_features;

- struct ceph_messenger *msgr; /* messenger instance */
+ struct ceph_messenger msgr; /* messenger instance */
struct ceph_mon_client monc;
struct ceph_osd_client osdc;

diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 3bff047..189ae06 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -25,15 +25,12 @@ struct ceph_connection_operations {
void (*dispatch) (struct ceph_connection *con, struct ceph_msg *m);

/* authorize an outgoing connection */
- int (*get_authorizer) (struct ceph_connection *con,
- void **buf, int *len, int *proto,
- void **reply_buf, int *reply_len, int force_new);
+ struct ceph_auth_handshake *(*get_authorizer) (
+ struct ceph_connection *con,
+ int *proto, int force_new);
int (*verify_authorizer_reply) (struct ceph_connection *con, int len);
int (*invalidate_authorizer)(struct ceph_connection *con);

- /* protocol version mismatch */
- void (*bad_proto) (struct ceph_connection *con);
-
/* there was some error on the socket (disconnect, whatever) */
void (*fault) (struct ceph_connection *con);

@@ -53,6 +50,7 @@ struct ceph_messenger {
struct ceph_entity_inst inst; /* my name+address */
struct ceph_entity_addr my_enc_addr;

+ atomic_t stopping;
bool nocrc;

/*
@@ -80,7 +78,10 @@ struct ceph_msg {
unsigned nr_pages; /* size of page array */
unsigned page_alignment; /* io offset in first page */
struct ceph_pagelist *pagelist; /* instead of pages */
+
+ struct ceph_connection *con;
struct list_head list_head;
+
struct kref kref;
struct bio *bio; /* instead of pages/pagelist */
struct bio *bio_iter; /* bio iterator */
@@ -106,23 +107,6 @@ struct ceph_msg_pos {
#define MAX_DELAY_INTERVAL (5 * 60 * HZ)

/*
- * ceph_connection state bit flags
- */
-#define LOSSYTX 0 /* we can close channel or drop messages on errors */
-#define CONNECTING 1
-#define NEGOTIATING 2
-#define KEEPALIVE_PENDING 3
-#define WRITE_PENDING 4 /* we have data ready to send */
-#define STANDBY 8 /* no outgoing messages, socket closed. we keep
- * the ceph_connection around to maintain shared
- * state with the peer. */
-#define CLOSED 10 /* we've closed the connection */
-#define SOCK_CLOSED 11 /* socket state changed to closed */
-#define OPENING 13 /* open connection w/ (possibly new) peer */
-#define DEAD 14 /* dead, about to kfree */
-#define BACKOFF 15
-
-/*
* A single connection with another host.
*
* We maintain a queue of outgoing messages, and some session state to
@@ -131,18 +115,22 @@ struct ceph_msg_pos {
*/
struct ceph_connection {
void *private;
- atomic_t nref;

const struct ceph_connection_operations *ops;

struct ceph_messenger *msgr;
+
+ atomic_t sock_state;
struct socket *sock;
- unsigned long state; /* connection state (see flags above) */
+ struct ceph_entity_addr peer_addr; /* peer address */
+ struct ceph_entity_addr peer_addr_for_me;
+
+ unsigned long flags;
+ unsigned long state;
const char *error_msg; /* error message, if any */

- struct ceph_entity_addr peer_addr; /* peer address */
struct ceph_entity_name peer_name; /* peer name */
- struct ceph_entity_addr peer_addr_for_me;
+
unsigned peer_features;
u32 connect_seq; /* identify the most recent connection
attempt for this connection, client */
@@ -163,16 +151,8 @@ struct ceph_connection {

/* connection negotiation temps */
char in_banner[CEPH_BANNER_MAX_LEN];
- union {
- struct { /* outgoing connection */
- struct ceph_msg_connect out_connect;
- struct ceph_msg_connect_reply in_reply;
- };
- struct { /* incoming */
- struct ceph_msg_connect in_connect;
- struct ceph_msg_connect_reply out_reply;
- };
- };
+ struct ceph_msg_connect out_connect;
+ struct ceph_msg_connect_reply in_reply;
struct ceph_entity_addr actual_peer_addr;

/* message out temps */
@@ -215,24 +195,26 @@ extern int ceph_msgr_init(void);
extern void ceph_msgr_exit(void);
extern void ceph_msgr_flush(void);

-extern struct ceph_messenger *ceph_messenger_create(
- struct ceph_entity_addr *myaddr,
- u32 features, u32 required);
-extern void ceph_messenger_destroy(struct ceph_messenger *);
+extern void ceph_messenger_init(struct ceph_messenger *msgr,
+ struct ceph_entity_addr *myaddr,
+ u32 supported_features,
+ u32 required_features,
+ bool nocrc);

-extern void ceph_con_init(struct ceph_messenger *msgr,
- struct ceph_connection *con);
+extern void ceph_con_init(struct ceph_connection *con, void *private,
+ const struct ceph_connection_operations *ops,
+ struct ceph_messenger *msgr);
extern void ceph_con_open(struct ceph_connection *con,
+ __u8 entity_type, __u64 entity_num,
struct ceph_entity_addr *addr);
extern bool ceph_con_opened(struct ceph_connection *con);
extern void ceph_con_close(struct ceph_connection *con);
extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg);
-extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg);
-extern void ceph_con_revoke_message(struct ceph_connection *con,
- struct ceph_msg *msg);
+
+extern void ceph_msg_revoke(struct ceph_msg *msg);
+extern void ceph_msg_revoke_incoming(struct ceph_msg *msg);
+
extern void ceph_con_keepalive(struct ceph_connection *con);
-extern struct ceph_connection *ceph_con_get(struct ceph_connection *con);
-extern void ceph_con_put(struct ceph_connection *con);

extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
bool can_fail);
diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
index 545f859..2113e38 100644
--- a/include/linux/ceph/mon_client.h
+++ b/include/linux/ceph/mon_client.h
@@ -70,7 +70,7 @@ struct ceph_mon_client {
bool hunting;
int cur_mon; /* last monitor i contacted */
unsigned long sub_sent, sub_renew_after;
- struct ceph_connection *con;
+ struct ceph_connection con;
bool have_fsid;

/* pending generic requests */
diff --git a/include/linux/ceph/msgpool.h b/include/linux/ceph/msgpool.h
index a362605..09fa96b 100644
--- a/include/linux/ceph/msgpool.h
+++ b/include/linux/ceph/msgpool.h
@@ -11,10 +11,11 @@
struct ceph_msgpool {
const char *name;
mempool_t *pool;
+ int type; /* preallocated message type */
int front_len; /* preallocated payload size */
};

-extern int ceph_msgpool_init(struct ceph_msgpool *pool,
+extern int ceph_msgpool_init(struct ceph_msgpool *pool, int type,
int front_len, int size, bool blocking,
const char *name);
extern void ceph_msgpool_destroy(struct ceph_msgpool *pool);
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 7c05ac2..d9b880e 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -6,9 +6,10 @@
#include <linux/mempool.h>
#include <linux/rbtree.h>

-#include "types.h"
-#include "osdmap.h"
-#include "messenger.h"
+#include <linux/ceph/types.h>
+#include <linux/ceph/osdmap.h>
+#include <linux/ceph/messenger.h>
+#include <linux/ceph/auth.h>

/*
* Maximum object name size
@@ -40,9 +41,7 @@ struct ceph_osd {
struct list_head o_requests;
struct list_head o_linger_requests;
struct list_head o_osd_lru;
- struct ceph_authorizer *o_authorizer;
- void *o_authorizer_buf, *o_authorizer_reply_buf;
- size_t o_authorizer_buf_len, o_authorizer_reply_buf_len;
+ struct ceph_auth_handshake o_auth;
unsigned long lru_ttl;
int o_marked_for_keepalive;
struct list_head o_keepalive_item;
@@ -208,7 +207,7 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
struct ceph_msg *msg);

-extern void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
+extern int ceph_calc_raw_layout(struct ceph_osd_client *osdc,
struct ceph_file_layout *layout,
u64 snapid,
u64 off, u64 *plen, u64 *bno,
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index ba4c205..11db454 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -111,9 +111,9 @@ extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
extern void ceph_osdmap_destroy(struct ceph_osdmap *map);

/* calculate mapping of a file extent to an object */
-extern void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
- u64 off, u64 *plen,
- u64 *bno, u64 *oxoff, u64 *oxlen);
+extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
+ u64 off, u64 *plen,
+ u64 *bno, u64 *oxoff, u64 *oxlen);

/* calculate mapping of object to a placement group */
extern int ceph_calc_object_layout(struct ceph_object_layout *ol,
diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
index 97e435b..e7a8c90 100644
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
@@ -168,7 +168,7 @@ struct crush_map {


/* crush.c */
-extern int crush_get_bucket_item_weight(struct crush_bucket *b, int pos);
+extern int crush_get_bucket_item_weight(const struct crush_bucket *b, int pos);
extern void crush_calc_parents(struct crush_map *map);
extern void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b);
extern void crush_destroy_bucket_list(struct crush_bucket_list *b);
@@ -177,4 +177,9 @@ extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b);
extern void crush_destroy_bucket(struct crush_bucket *b);
extern void crush_destroy(struct crush_map *map);

+static inline int crush_calc_tree_node(int i)
+{
+ return ((i+1) << 1)-1;
+}
+
#endif
diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h
index c46b99c..9322ab8 100644
--- a/include/linux/crush/mapper.h
+++ b/include/linux/crush/mapper.h
@@ -10,11 +10,11 @@

#include "crush.h"

-extern int crush_find_rule(struct crush_map *map, int pool, int type, int size);
-extern int crush_do_rule(struct crush_map *map,
+extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size);
+extern int crush_do_rule(const struct crush_map *map,
int ruleno,
int x, int *result, int result_max,
int forcefeed, /* -1 for none */
- __u32 *weights);
+ const __u32 *weights);

#endif
diff --git a/kernel/module.c b/kernel/module.c
index 61ea75e..8597217 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2273,12 +2273,17 @@ static void layout_symtab(struct module *mod, struct load_info *info)
src = (void *)info->hdr + symsect->sh_offset;
nsrc = symsect->sh_size / sizeof(*src);

+ /* strtab always starts with a nul, so offset 0 is the empty string. */
+ strtab_size = 1;
+
/* Compute total space required for the core symbols' strtab. */
- for (ndst = i = strtab_size = 1; i < nsrc; ++i, ++src)
- if (is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) {
- strtab_size += strlen(&info->strtab[src->st_name]) + 1;
+ for (ndst = i = 0; i < nsrc; i++) {
+ if (i == 0 ||
+ is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) {
+ strtab_size += strlen(&info->strtab[src[i].st_name])+1;
ndst++;
}
+ }

/* Append room for core symbols at end of core part. */
info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
@@ -2312,15 +2317,15 @@ static void add_kallsyms(struct module *mod, const struct load_info *info)
mod->core_symtab = dst = mod->module_core + info->symoffs;
mod->core_strtab = s = mod->module_core + info->stroffs;
src = mod->symtab;
- *dst = *src;
*s++ = 0;
- for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) {
- if (!is_core_symbol(src, info->sechdrs, info->hdr->e_shnum))
- continue;
-
- dst[ndst] = *src;
- dst[ndst++].st_name = s - mod->core_strtab;
- s += strlcpy(s, &mod->strtab[src->st_name], KSYM_NAME_LEN) + 1;
+ for (ndst = i = 0; i < mod->num_symtab; i++) {
+ if (i == 0 ||
+ is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) {
+ dst[ndst] = src[i];
+ dst[ndst++].st_name = s - mod->core_strtab;
+ s += strlcpy(s, &mod->strtab[src[i].st_name],
+ KSYM_NAME_LEN) + 1;
+ }
}
mod->core_num_syms = ndst;
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7685d4a..81c275b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1489,17 +1489,26 @@ static int mem_cgroup_count_children(struct mem_cgroup *memcg)
u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
{
u64 limit;
- u64 memsw;

limit = res_counter_read_u64(&memcg->res, RES_LIMIT);
- limit += total_swap_pages << PAGE_SHIFT;

- memsw = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
/*
- * If memsw is finite and limits the amount of swap space available
- * to this memcg, return that limit.
+ * Do not consider swap space if we cannot swap due to swappiness
*/
- return min(limit, memsw);
+ if (mem_cgroup_swappiness(memcg)) {
+ u64 memsw;
+
+ limit += total_swap_pages << PAGE_SHIFT;
+ memsw = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
+
+ /*
+ * If memsw is finite and limits the amount of swap space
+ * available to this memcg, return that limit.
+ */
+ limit = min(limit, memsw);
+ }
+
+ return limit;
}

static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,
diff --git a/mm/shmem.c b/mm/shmem.c
index 40383cd..a859b06 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -595,7 +595,7 @@ static void shmem_evict_inode(struct inode *inode)
kfree(xattr->name);
kfree(xattr);
}
- BUG_ON(inode->i_blocks);
+ WARN_ON(inode->i_blocks);
shmem_free_inode(inode->i_sb);
end_writeback(inode);
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e989ee2..e6ca505 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3128,6 +3128,8 @@ static int kswapd(void *p)
&balanced_classzone_idx);
}
}
+
+ current->reclaim_state = NULL;
return 0;
}

diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c
index 214c2bb..925ca58 100644
--- a/net/ceph/auth_none.c
+++ b/net/ceph/auth_none.c
@@ -59,9 +59,7 @@ static int handle_reply(struct ceph_auth_client *ac, int result,
*/
static int ceph_auth_none_create_authorizer(
struct ceph_auth_client *ac, int peer_type,
- struct ceph_authorizer **a,
- void **buf, size_t *len,
- void **reply_buf, size_t *reply_len)
+ struct ceph_auth_handshake *auth)
{
struct ceph_auth_none_info *ai = ac->private;
struct ceph_none_authorizer *au = &ai->au;
@@ -82,11 +80,12 @@ static int ceph_auth_none_create_authorizer(
dout("built authorizer len %d\n", au->buf_len);
}

- *a = (struct ceph_authorizer *)au;
- *buf = au->buf;
- *len = au->buf_len;
- *reply_buf = au->reply_buf;
- *reply_len = sizeof(au->reply_buf);
+ auth->authorizer = (struct ceph_authorizer *) au;
+ auth->authorizer_buf = au->buf;
+ auth->authorizer_buf_len = au->buf_len;
+ auth->authorizer_reply_buf = au->reply_buf;
+ auth->authorizer_reply_buf_len = sizeof (au->reply_buf);
+
return 0;

bad2:
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 1587dc6..a16bf14 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -526,9 +526,7 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,

static int ceph_x_create_authorizer(
struct ceph_auth_client *ac, int peer_type,
- struct ceph_authorizer **a,
- void **buf, size_t *len,
- void **reply_buf, size_t *reply_len)
+ struct ceph_auth_handshake *auth)
{
struct ceph_x_authorizer *au;
struct ceph_x_ticket_handler *th;
@@ -548,11 +546,12 @@ static int ceph_x_create_authorizer(
return ret;
}

- *a = (struct ceph_authorizer *)au;
- *buf = au->buf->vec.iov_base;
- *len = au->buf->vec.iov_len;
- *reply_buf = au->reply_buf;
- *reply_len = sizeof(au->reply_buf);
+ auth->authorizer = (struct ceph_authorizer *) au;
+ auth->authorizer_buf = au->buf->vec.iov_base;
+ auth->authorizer_buf_len = au->buf->vec.iov_len;
+ auth->authorizer_reply_buf = au->reply_buf;
+ auth->authorizer_reply_buf_len = sizeof (au->reply_buf);
+
return 0;
}

diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index cc91319..8e74e8c 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -83,7 +83,6 @@ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid)
return -1;
}
} else {
- pr_info("client%lld fsid %pU\n", ceph_client_id(client), fsid);
memcpy(&client->fsid, fsid, sizeof(*fsid));
}
return 0;
@@ -468,19 +467,15 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
/* msgr */
if (ceph_test_opt(client, MYIP))
myaddr = &client->options->my_addr;
- client->msgr = ceph_messenger_create(myaddr,
- client->supported_features,
- client->required_features);
- if (IS_ERR(client->msgr)) {
- err = PTR_ERR(client->msgr);
- goto fail;
- }
- client->msgr->nocrc = ceph_test_opt(client, NOCRC);
+ ceph_messenger_init(&client->msgr, myaddr,
+ client->supported_features,
+ client->required_features,
+ ceph_test_opt(client, NOCRC));

/* subsystems */
err = ceph_monc_init(&client->monc, client);
if (err < 0)
- goto fail_msgr;
+ goto fail;
err = ceph_osdc_init(&client->osdc, client);
if (err < 0)
goto fail_monc;
@@ -489,8 +484,6 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,

fail_monc:
ceph_monc_stop(&client->monc);
-fail_msgr:
- ceph_messenger_destroy(client->msgr);
fail:
kfree(client);
return ERR_PTR(err);
@@ -501,22 +494,15 @@ void ceph_destroy_client(struct ceph_client *client)
{
dout("destroy_client %p\n", client);

+ atomic_set(&client->msgr.stopping, 1);
+
/* unmount */
ceph_osdc_stop(&client->osdc);

- /*
- * make sure osd connections close out before destroying the
- * auth module, which is needed to free those connections'
- * ceph_authorizers.
- */
- ceph_msgr_flush();
-
ceph_monc_stop(&client->monc);

ceph_debugfs_client_cleanup(client);

- ceph_messenger_destroy(client->msgr);
-
ceph_destroy_options(client->options);

kfree(client);
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
index d6ebb13..fbda052 100644
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -26,9 +26,9 @@ const char *crush_bucket_alg_name(int alg)
* @b: bucket pointer
* @p: item index in bucket
*/
-int crush_get_bucket_item_weight(struct crush_bucket *b, int p)
+int crush_get_bucket_item_weight(const struct crush_bucket *b, int p)
{
- if (p >= b->size)
+ if ((__u32)p >= b->size)
return 0;

switch (b->alg) {
@@ -37,9 +37,7 @@ int crush_get_bucket_item_weight(struct crush_bucket *b, int p)
case CRUSH_BUCKET_LIST:
return ((struct crush_bucket_list *)b)->item_weights[p];
case CRUSH_BUCKET_TREE:
- if (p & 1)
- return ((struct crush_bucket_tree *)b)->node_weights[p];
- return 0;
+ return ((struct crush_bucket_tree *)b)->node_weights[crush_calc_tree_node(p)];
case CRUSH_BUCKET_STRAW:
return ((struct crush_bucket_straw *)b)->item_weights[p];
}
@@ -87,6 +85,8 @@ void crush_destroy_bucket_list(struct crush_bucket_list *b)

void crush_destroy_bucket_tree(struct crush_bucket_tree *b)
{
+ kfree(b->h.perm);
+ kfree(b->h.items);
kfree(b->node_weights);
kfree(b);
}
@@ -124,10 +124,9 @@ void crush_destroy_bucket(struct crush_bucket *b)
*/
void crush_destroy(struct crush_map *map)
{
- int b;
-
/* buckets */
if (map->buckets) {
+ __s32 b;
for (b = 0; b < map->max_buckets; b++) {
if (map->buckets[b] == NULL)
continue;
@@ -138,6 +137,7 @@ void crush_destroy(struct crush_map *map)

/* rules */
if (map->rules) {
+ __u32 b;
for (b = 0; b < map->max_rules; b++)
kfree(map->rules[b]);
kfree(map->rules);
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index b79747c..00baad5 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -32,9 +32,9 @@
* @type: storage ruleset type (user defined)
* @size: output set size
*/
-int crush_find_rule(struct crush_map *map, int ruleset, int type, int size)
+int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size)
{
- int i;
+ __u32 i;

for (i = 0; i < map->max_rules; i++) {
if (map->rules[i] &&
@@ -72,7 +72,7 @@ static int bucket_perm_choose(struct crush_bucket *bucket,
unsigned i, s;

/* start a new permutation if @x has changed */
- if (bucket->perm_x != x || bucket->perm_n == 0) {
+ if (bucket->perm_x != (__u32)x || bucket->perm_n == 0) {
dprintk("bucket %d new x=%d\n", bucket->id, x);
bucket->perm_x = x;

@@ -152,8 +152,8 @@ static int bucket_list_choose(struct crush_bucket_list *bucket,
return bucket->h.items[i];
}

- BUG_ON(1);
- return 0;
+ dprintk("bad list sums for bucket %d\n", bucket->h.id);
+ return bucket->h.items[0];
}


@@ -219,7 +219,7 @@ static int bucket_tree_choose(struct crush_bucket_tree *bucket,
static int bucket_straw_choose(struct crush_bucket_straw *bucket,
int x, int r)
{
- int i;
+ __u32 i;
int high = 0;
__u64 high_draw = 0;
__u64 draw;
@@ -239,6 +239,7 @@ static int bucket_straw_choose(struct crush_bucket_straw *bucket,
static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
{
dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
+ BUG_ON(in->size == 0);
switch (in->alg) {
case CRUSH_BUCKET_UNIFORM:
return bucket_uniform_choose((struct crush_bucket_uniform *)in,
@@ -253,7 +254,7 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
return bucket_straw_choose((struct crush_bucket_straw *)in,
x, r);
default:
- BUG_ON(1);
+ dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
return in->items[0];
}
}
@@ -262,7 +263,7 @@ static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
* true if device is marked "out" (failed, fully offloaded)
* of the cluster
*/
-static int is_out(struct crush_map *map, __u32 *weight, int item, int x)
+static int is_out(const struct crush_map *map, const __u32 *weight, int item, int x)
{
if (weight[item] >= 0x10000)
return 0;
@@ -287,16 +288,16 @@ static int is_out(struct crush_map *map, __u32 *weight, int item, int x)
* @recurse_to_leaf: true if we want one device under each item of given type
* @out2: second output vector for leaf items (if @recurse_to_leaf)
*/
-static int crush_choose(struct crush_map *map,
+static int crush_choose(const struct crush_map *map,
struct crush_bucket *bucket,
- __u32 *weight,
+ const __u32 *weight,
int x, int numrep, int type,
int *out, int outpos,
int firstn, int recurse_to_leaf,
int *out2)
{
int rep;
- int ftotal, flocal;
+ unsigned int ftotal, flocal;
int retry_descent, retry_bucket, skip_rep;
struct crush_bucket *in = bucket;
int r;
@@ -304,7 +305,7 @@ static int crush_choose(struct crush_map *map,
int item = 0;
int itemtype;
int collide, reject;
- const int orig_tries = 5; /* attempts before we fall back to search */
+ const unsigned int orig_tries = 5; /* attempts before we fall back to search */

dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
bucket->id, x, outpos, numrep);
@@ -325,7 +326,7 @@ static int crush_choose(struct crush_map *map,
r = rep;
if (in->alg == CRUSH_BUCKET_UNIFORM) {
/* be careful */
- if (firstn || numrep >= in->size)
+ if (firstn || (__u32)numrep >= in->size)
/* r' = r + f_total */
r += ftotal;
else if (in->size % numrep == 0)
@@ -354,7 +355,11 @@ static int crush_choose(struct crush_map *map,
item = bucket_perm_choose(in, x, r);
else
item = crush_bucket_choose(in, x, r);
- BUG_ON(item >= map->max_devices);
+ if (item >= map->max_devices) {
+ dprintk(" bad item %d\n", item);
+ skip_rep = 1;
+ break;
+ }

/* desired type? */
if (item < 0)
@@ -365,8 +370,12 @@ static int crush_choose(struct crush_map *map,

/* keep going? */
if (itemtype != type) {
- BUG_ON(item >= 0 ||
- (-1-item) >= map->max_buckets);
+ if (item >= 0 ||
+ (-1-item) >= map->max_buckets) {
+ dprintk(" bad item type %d\n", type);
+ skip_rep = 1;
+ break;
+ }
in = map->buckets[-1-item];
retry_bucket = 1;
continue;
@@ -415,7 +424,7 @@ reject:
if (collide && flocal < 3)
/* retry locally a few times */
retry_bucket = 1;
- else if (flocal < in->size + orig_tries)
+ else if (flocal <= in->size + orig_tries)
/* exhaustive bucket search */
retry_bucket = 1;
else if (ftotal < 20)
@@ -425,7 +434,7 @@ reject:
/* else give up */
skip_rep = 1;
dprintk(" reject %d collide %d "
- "ftotal %d flocal %d\n",
+ "ftotal %u flocal %u\n",
reject, collide, ftotal,
flocal);
}
@@ -456,9 +465,9 @@ reject:
* @result_max: maximum result size
* @force: force initial replica choice; -1 for none
*/
-int crush_do_rule(struct crush_map *map,
+int crush_do_rule(const struct crush_map *map,
int ruleno, int x, int *result, int result_max,
- int force, __u32 *weight)
+ int force, const __u32 *weight)
{
int result_len;
int force_context[CRUSH_MAX_DEPTH];
@@ -473,12 +482,15 @@ int crush_do_rule(struct crush_map *map,
int osize;
int *tmp;
struct crush_rule *rule;
- int step;
+ __u32 step;
int i, j;
int numrep;
int firstn;

- BUG_ON(ruleno >= map->max_rules);
+ if ((__u32)ruleno >= map->max_rules) {
+ dprintk(" bad ruleno %d\n", ruleno);
+ return 0;
+ }

rule = map->rules[ruleno];
result_len = 0;
@@ -488,7 +500,8 @@ int crush_do_rule(struct crush_map *map,
/*
* determine hierarchical context of force, if any. note
* that this may or may not correspond to the specific types
- * referenced by the crush rule.
+ * referenced by the crush rule. it will also only affect
+ * the first descent (TAKE).
*/
if (force >= 0 &&
force < map->max_devices &&
@@ -527,7 +540,8 @@ int crush_do_rule(struct crush_map *map,
firstn = 1;
case CRUSH_RULE_CHOOSE_LEAF_INDEP:
case CRUSH_RULE_CHOOSE_INDEP:
- BUG_ON(wsize == 0);
+ if (wsize == 0)
+ break;

recurse_to_leaf =
rule->steps[step].op ==
@@ -596,7 +610,9 @@ int crush_do_rule(struct crush_map *map,
break;

default:
- BUG_ON(1);
+ dprintk(" unknown op %d at step %d\n",
+ curstep->op, step);
+ break;
}
}
return result_len;
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index b780cb7..9da7fdd 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -466,6 +466,7 @@ void ceph_key_destroy(struct key *key) {
struct ceph_crypto_key *ckey = key->payload.data;

ceph_crypto_key_destroy(ckey);
+ kfree(ckey);
}

struct key_type key_type_ceph = {
diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h
index 1919d15..3572dc5 100644
--- a/net/ceph/crypto.h
+++ b/net/ceph/crypto.h
@@ -16,7 +16,8 @@ struct ceph_crypto_key {

static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key)
{
- kfree(key->key);
+ if (key)
+ kfree(key->key);
}

extern int ceph_crypto_key_clone(struct ceph_crypto_key *dst,
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 27d4ea3..680978d 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -189,6 +189,9 @@ int ceph_debugfs_client_init(struct ceph_client *client)
snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid,
client->monc.auth->global_id);

+ dout("ceph_debugfs_client_init %p %s\n", client, name);
+
+ BUG_ON(client->debugfs_dir);
client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir);
if (!client->debugfs_dir)
goto out;
@@ -234,6 +237,7 @@ out:

void ceph_debugfs_client_cleanup(struct ceph_client *client)
{
+ dout("ceph_debugfs_client_cleanup %p\n", client);
debugfs_remove(client->debugfs_osdmap);
debugfs_remove(client->debugfs_monmap);
debugfs_remove(client->osdc.debugfs_file);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index f0993af..aa71a67 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -29,6 +29,74 @@
* the sender.
*/

+/*
+ * We track the state of the socket on a given connection using
+ * values defined below. The transition to a new socket state is
+ * handled by a function which verifies we aren't coming from an
+ * unexpected state.
+ *
+ * --------
+ * | NEW* | transient initial state
+ * --------
+ * | con_sock_state_init()
+ * v
+ * ----------
+ * | CLOSED | initialized, but no socket (and no
+ * ---------- TCP connection)
+ * ^ \
+ * | \ con_sock_state_connecting()
+ * | ----------------------
+ * | \
+ * + con_sock_state_closed() \
+ * |+--------------------------- \
+ * | \ \ \
+ * | ----------- \ \
+ * | | CLOSING | socket event; \ \
+ * | ----------- await close \ \
+ * | ^ \ |
+ * | | \ |
+ * | + con_sock_state_closing() \ |
+ * | / \ | |
+ * | / --------------- | |
+ * | / \ v v
+ * | / --------------
+ * | / -----------------| CONNECTING | socket created, TCP
+ * | | / -------------- connect initiated
+ * | | | con_sock_state_connected()
+ * | | v
+ * -------------
+ * | CONNECTED | TCP connection established
+ * -------------
+ *
+ * State values for ceph_connection->sock_state; NEW is assumed to be 0.
+ */
+
+#define CON_SOCK_STATE_NEW 0 /* -> CLOSED */
+#define CON_SOCK_STATE_CLOSED 1 /* -> CONNECTING */
+#define CON_SOCK_STATE_CONNECTING 2 /* -> CONNECTED or -> CLOSING */
+#define CON_SOCK_STATE_CONNECTED 3 /* -> CLOSING or -> CLOSED */
+#define CON_SOCK_STATE_CLOSING 4 /* -> CLOSED */
+
+/*
+ * connection states
+ */
+#define CON_STATE_CLOSED 1 /* -> PREOPEN */
+#define CON_STATE_PREOPEN 2 /* -> CONNECTING, CLOSED */
+#define CON_STATE_CONNECTING 3 /* -> NEGOTIATING, CLOSED */
+#define CON_STATE_NEGOTIATING 4 /* -> OPEN, CLOSED */
+#define CON_STATE_OPEN 5 /* -> STANDBY, CLOSED */
+#define CON_STATE_STANDBY 6 /* -> PREOPEN, CLOSED */
+
+/*
+ * ceph_connection flag bits
+ */
+#define CON_FLAG_LOSSYTX 0 /* we can close channel or drop
+ * messages on errors */
+#define CON_FLAG_KEEPALIVE_PENDING 1 /* we need to send a keepalive */
+#define CON_FLAG_WRITE_PENDING 2 /* we have data ready to send */
+#define CON_FLAG_SOCK_CLOSED 3 /* socket state changed to closed */
+#define CON_FLAG_BACKOFF 4 /* need to retry queuing delayed work */
+
/* static tag bytes (protocol control messages) */
static char tag_msg = CEPH_MSGR_TAG_MSG;
static char tag_ack = CEPH_MSGR_TAG_ACK;
@@ -147,72 +215,130 @@ void ceph_msgr_flush(void)
}
EXPORT_SYMBOL(ceph_msgr_flush);

+/* Connection socket state transition functions */
+
+static void con_sock_state_init(struct ceph_connection *con)
+{
+ int old_state;
+
+ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED);
+ if (WARN_ON(old_state != CON_SOCK_STATE_NEW))
+ printk("%s: unexpected old state %d\n", __func__, old_state);
+ dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
+ CON_SOCK_STATE_CLOSED);
+}
+
+static void con_sock_state_connecting(struct ceph_connection *con)
+{
+ int old_state;
+
+ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTING);
+ if (WARN_ON(old_state != CON_SOCK_STATE_CLOSED))
+ printk("%s: unexpected old state %d\n", __func__, old_state);
+ dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
+ CON_SOCK_STATE_CONNECTING);
+}
+
+static void con_sock_state_connected(struct ceph_connection *con)
+{
+ int old_state;
+
+ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CONNECTED);
+ if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING))
+ printk("%s: unexpected old state %d\n", __func__, old_state);
+ dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
+ CON_SOCK_STATE_CONNECTED);
+}
+
+static void con_sock_state_closing(struct ceph_connection *con)
+{
+ int old_state;
+
+ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSING);
+ if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTING &&
+ old_state != CON_SOCK_STATE_CONNECTED &&
+ old_state != CON_SOCK_STATE_CLOSING))
+ printk("%s: unexpected old state %d\n", __func__, old_state);
+ dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
+ CON_SOCK_STATE_CLOSING);
+}
+
+static void con_sock_state_closed(struct ceph_connection *con)
+{
+ int old_state;
+
+ old_state = atomic_xchg(&con->sock_state, CON_SOCK_STATE_CLOSED);
+ if (WARN_ON(old_state != CON_SOCK_STATE_CONNECTED &&
+ old_state != CON_SOCK_STATE_CLOSING &&
+ old_state != CON_SOCK_STATE_CONNECTING &&
+ old_state != CON_SOCK_STATE_CLOSED))
+ printk("%s: unexpected old state %d\n", __func__, old_state);
+ dout("%s con %p sock %d -> %d\n", __func__, con, old_state,
+ CON_SOCK_STATE_CLOSED);
+}

/*
* socket callback functions
*/

/* data available on socket, or listen socket received a connect */
-static void ceph_data_ready(struct sock *sk, int count_unused)
+static void ceph_sock_data_ready(struct sock *sk, int count_unused)
{
struct ceph_connection *con = sk->sk_user_data;
+ if (atomic_read(&con->msgr->stopping)) {
+ return;
+ }

if (sk->sk_state != TCP_CLOSE_WAIT) {
- dout("ceph_data_ready on %p state = %lu, queueing work\n",
+ dout("%s on %p state = %lu, queueing work\n", __func__,
con, con->state);
queue_con(con);
}
}

/* socket has buffer space for writing */
-static void ceph_write_space(struct sock *sk)
+static void ceph_sock_write_space(struct sock *sk)
{
struct ceph_connection *con = sk->sk_user_data;

/* only queue to workqueue if there is data we want to write,
* and there is sufficient space in the socket buffer to accept
- * more data. clear SOCK_NOSPACE so that ceph_write_space()
+ * more data. clear SOCK_NOSPACE so that ceph_sock_write_space()
* doesn't get called again until try_write() fills the socket
* buffer. See net/ipv4/tcp_input.c:tcp_check_space()
* and net/core/stream.c:sk_stream_write_space().
*/
- if (test_bit(WRITE_PENDING, &con->state)) {
+ if (test_bit(CON_FLAG_WRITE_PENDING, &con->flags)) {
if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
- dout("ceph_write_space %p queueing write work\n", con);
+ dout("%s %p queueing write work\n", __func__, con);
clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
queue_con(con);
}
} else {
- dout("ceph_write_space %p nothing to write\n", con);
+ dout("%s %p nothing to write\n", __func__, con);
}
}

/* socket's state has changed */
-static void ceph_state_change(struct sock *sk)
+static void ceph_sock_state_change(struct sock *sk)
{
struct ceph_connection *con = sk->sk_user_data;

- dout("ceph_state_change %p state = %lu sk_state = %u\n",
+ dout("%s %p state = %lu sk_state = %u\n", __func__,
con, con->state, sk->sk_state);

- if (test_bit(CLOSED, &con->state))
- return;
-
switch (sk->sk_state) {
case TCP_CLOSE:
- dout("ceph_state_change TCP_CLOSE\n");
+ dout("%s TCP_CLOSE\n", __func__);
case TCP_CLOSE_WAIT:
- dout("ceph_state_change TCP_CLOSE_WAIT\n");
- if (test_and_set_bit(SOCK_CLOSED, &con->state) == 0) {
- if (test_bit(CONNECTING, &con->state))
- con->error_msg = "connection failed";
- else
- con->error_msg = "socket closed";
- queue_con(con);
- }
+ dout("%s TCP_CLOSE_WAIT\n", __func__);
+ con_sock_state_closing(con);
+ set_bit(CON_FLAG_SOCK_CLOSED, &con->flags);
+ queue_con(con);
break;
case TCP_ESTABLISHED:
- dout("ceph_state_change TCP_ESTABLISHED\n");
+ dout("%s TCP_ESTABLISHED\n", __func__);
+ con_sock_state_connected(con);
queue_con(con);
break;
default: /* Everything else is uninteresting */
@@ -228,9 +354,9 @@ static void set_sock_callbacks(struct socket *sock,
{
struct sock *sk = sock->sk;
sk->sk_user_data = con;
- sk->sk_data_ready = ceph_data_ready;
- sk->sk_write_space = ceph_write_space;
- sk->sk_state_change = ceph_state_change;
+ sk->sk_data_ready = ceph_sock_data_ready;
+ sk->sk_write_space = ceph_sock_write_space;
+ sk->sk_state_change = ceph_sock_state_change;
}


@@ -262,6 +388,7 @@ static int ceph_tcp_connect(struct ceph_connection *con)

dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr));

+ con_sock_state_connecting(con);
ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr),
O_NONBLOCK);
if (ret == -EINPROGRESS) {
@@ -277,7 +404,6 @@ static int ceph_tcp_connect(struct ceph_connection *con)
return ret;
}
con->sock = sock;
-
return 0;
}

@@ -333,16 +459,24 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
*/
static int con_close_socket(struct ceph_connection *con)
{
- int rc;
+ int rc = 0;

dout("con_close_socket on %p sock %p\n", con, con->sock);
- if (!con->sock)
- return 0;
- set_bit(SOCK_CLOSED, &con->state);
- rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR);
- sock_release(con->sock);
- con->sock = NULL;
- clear_bit(SOCK_CLOSED, &con->state);
+ if (con->sock) {
+ rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR);
+ sock_release(con->sock);
+ con->sock = NULL;
+ }
+
+ /*
+ * Forcibly clear the SOCK_CLOSED flag. It gets set
+ * independent of the connection mutex, and we could have
+ * received a socket close event before we had the chance to
+ * shut the socket down.
+ */
+ clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags);
+
+ con_sock_state_closed(con);
return rc;
}

@@ -353,6 +487,10 @@ static int con_close_socket(struct ceph_connection *con)
static void ceph_msg_remove(struct ceph_msg *msg)
{
list_del_init(&msg->list_head);
+ BUG_ON(msg->con == NULL);
+ msg->con->ops->put(msg->con);
+ msg->con = NULL;
+
ceph_msg_put(msg);
}
static void ceph_msg_remove_list(struct list_head *head)
@@ -372,8 +510,11 @@ static void reset_connection(struct ceph_connection *con)
ceph_msg_remove_list(&con->out_sent);

if (con->in_msg) {
+ BUG_ON(con->in_msg->con != con);
+ con->in_msg->con = NULL;
ceph_msg_put(con->in_msg);
con->in_msg = NULL;
+ con->ops->put(con);
}

con->connect_seq = 0;
@@ -391,32 +532,44 @@ static void reset_connection(struct ceph_connection *con)
*/
void ceph_con_close(struct ceph_connection *con)
{
+ mutex_lock(&con->mutex);
dout("con_close %p peer %s\n", con,
ceph_pr_addr(&con->peer_addr.in_addr));
- set_bit(CLOSED, &con->state); /* in case there's queued work */
- clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */
- clear_bit(LOSSYTX, &con->state); /* so we retry next connect */
- clear_bit(KEEPALIVE_PENDING, &con->state);
- clear_bit(WRITE_PENDING, &con->state);
- mutex_lock(&con->mutex);
+ con->state = CON_STATE_CLOSED;
+
+ clear_bit(CON_FLAG_LOSSYTX, &con->flags); /* so we retry next connect */
+ clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags);
+ clear_bit(CON_FLAG_WRITE_PENDING, &con->flags);
+ clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags);
+ clear_bit(CON_FLAG_BACKOFF, &con->flags);
+
reset_connection(con);
con->peer_global_seq = 0;
cancel_delayed_work(&con->work);
+ con_close_socket(con);
mutex_unlock(&con->mutex);
- queue_con(con);
}
EXPORT_SYMBOL(ceph_con_close);

/*
* Reopen a closed connection, with a new peer address.
*/
-void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr)
+void ceph_con_open(struct ceph_connection *con,
+ __u8 entity_type, __u64 entity_num,
+ struct ceph_entity_addr *addr)
{
+ mutex_lock(&con->mutex);
dout("con_open %p %s\n", con, ceph_pr_addr(&addr->in_addr));
- set_bit(OPENING, &con->state);
- clear_bit(CLOSED, &con->state);
+
+ BUG_ON(con->state != CON_STATE_CLOSED);
+ con->state = CON_STATE_PREOPEN;
+
+ con->peer_name.type = (__u8) entity_type;
+ con->peer_name.num = cpu_to_le64(entity_num);
+
memcpy(&con->peer_addr, addr, sizeof(*addr));
con->delay = 0; /* reset backoff memory */
+ mutex_unlock(&con->mutex);
queue_con(con);
}
EXPORT_SYMBOL(ceph_con_open);
@@ -430,42 +583,26 @@ bool ceph_con_opened(struct ceph_connection *con)
}

/*
- * generic get/put
- */
-struct ceph_connection *ceph_con_get(struct ceph_connection *con)
-{
- int nref = __atomic_add_unless(&con->nref, 1, 0);
-
- dout("con_get %p nref = %d -> %d\n", con, nref, nref + 1);
-
- return nref ? con : NULL;
-}
-
-void ceph_con_put(struct ceph_connection *con)
-{
- int nref = atomic_dec_return(&con->nref);
-
- BUG_ON(nref < 0);
- if (nref == 0) {
- BUG_ON(con->sock);
- kfree(con);
- }
- dout("con_put %p nref = %d -> %d\n", con, nref + 1, nref);
-}
-
-/*
* initialize a new connection.
*/
-void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con)
+void ceph_con_init(struct ceph_connection *con, void *private,
+ const struct ceph_connection_operations *ops,
+ struct ceph_messenger *msgr)
{
dout("con_init %p\n", con);
memset(con, 0, sizeof(*con));
- atomic_set(&con->nref, 1);
+ con->private = private;
+ con->ops = ops;
con->msgr = msgr;
+
+ con_sock_state_init(con);
+
mutex_init(&con->mutex);
INIT_LIST_HEAD(&con->out_queue);
INIT_LIST_HEAD(&con->out_sent);
INIT_DELAYED_WORK(&con->work, con_work);
+
+ con->state = CON_STATE_CLOSED;
}
EXPORT_SYMBOL(ceph_con_init);

@@ -486,14 +623,14 @@ static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt)
return ret;
}

-static void ceph_con_out_kvec_reset(struct ceph_connection *con)
+static void con_out_kvec_reset(struct ceph_connection *con)
{
con->out_kvec_left = 0;
con->out_kvec_bytes = 0;
con->out_kvec_cur = &con->out_kvec[0];
}

-static void ceph_con_out_kvec_add(struct ceph_connection *con,
+static void con_out_kvec_add(struct ceph_connection *con,
size_t size, void *data)
{
int index;
@@ -507,6 +644,53 @@ static void ceph_con_out_kvec_add(struct ceph_connection *con,
con->out_kvec_bytes += size;
}

+#ifdef CONFIG_BLOCK
+static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg)
+{
+ if (!bio) {
+ *iter = NULL;
+ *seg = 0;
+ return;
+ }
+ *iter = bio;
+ *seg = bio->bi_idx;
+}
+
+static void iter_bio_next(struct bio **bio_iter, int *seg)
+{
+ if (*bio_iter == NULL)
+ return;
+
+ BUG_ON(*seg >= (*bio_iter)->bi_vcnt);
+
+ (*seg)++;
+ if (*seg == (*bio_iter)->bi_vcnt)
+ init_bio_iter((*bio_iter)->bi_next, bio_iter, seg);
+}
+#endif
+
+static void prepare_write_message_data(struct ceph_connection *con)
+{
+ struct ceph_msg *msg = con->out_msg;
+
+ BUG_ON(!msg);
+ BUG_ON(!msg->hdr.data_len);
+
+ /* initialize page iterator */
+ con->out_msg_pos.page = 0;
+ if (msg->pages)
+ con->out_msg_pos.page_pos = msg->page_alignment;
+ else
+ con->out_msg_pos.page_pos = 0;
+#ifdef CONFIG_BLOCK
+ if (msg->bio)
+ init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg);
+#endif
+ con->out_msg_pos.data_pos = 0;
+ con->out_msg_pos.did_page_crc = false;
+ con->out_more = 1; /* data + footer will follow */
+}
+
/*
* Prepare footer for currently outgoing message, and finish things
* off. Assumes out_kvec* are already valid.. we just add on to the end.
@@ -516,6 +700,8 @@ static void prepare_write_message_footer(struct ceph_connection *con)
struct ceph_msg *m = con->out_msg;
int v = con->out_kvec_left;

+ m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE;
+
dout("prepare_write_message_footer %p\n", con);
con->out_kvec_is_msg = true;
con->out_kvec[v].iov_base = &m->footer;
@@ -534,7 +720,7 @@ static void prepare_write_message(struct ceph_connection *con)
struct ceph_msg *m;
u32 crc;

- ceph_con_out_kvec_reset(con);
+ con_out_kvec_reset(con);
con->out_kvec_is_msg = true;
con->out_msg_done = false;

@@ -542,14 +728,16 @@ static void prepare_write_message(struct ceph_connection *con)
* TCP packet that's a good thing. */
if (con->in_seq > con->in_seq_acked) {
con->in_seq_acked = con->in_seq;
- ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
+ con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
- ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack),
+ con_out_kvec_add(con, sizeof (con->out_temp_ack),
&con->out_temp_ack);
}

+ BUG_ON(list_empty(&con->out_queue));
m = list_first_entry(&con->out_queue, struct ceph_msg, list_head);
con->out_msg = m;
+ BUG_ON(m->con != con);

/* put message on sent list */
ceph_msg_get(m);
@@ -572,18 +760,18 @@ static void prepare_write_message(struct ceph_connection *con)
BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len);

/* tag + hdr + front + middle */
- ceph_con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
- ceph_con_out_kvec_add(con, sizeof (m->hdr), &m->hdr);
- ceph_con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
+ con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
+ con_out_kvec_add(con, sizeof (m->hdr), &m->hdr);
+ con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);

if (m->middle)
- ceph_con_out_kvec_add(con, m->middle->vec.iov_len,
+ con_out_kvec_add(con, m->middle->vec.iov_len,
m->middle->vec.iov_base);

/* fill in crc (except data pages), footer */
crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc));
con->out_msg->hdr.crc = cpu_to_le32(crc);
- con->out_msg->footer.flags = CEPH_MSG_FOOTER_COMPLETE;
+ con->out_msg->footer.flags = 0;

crc = crc32c(0, m->front.iov_base, m->front.iov_len);
con->out_msg->footer.front_crc = cpu_to_le32(crc);
@@ -593,28 +781,19 @@ static void prepare_write_message(struct ceph_connection *con)
con->out_msg->footer.middle_crc = cpu_to_le32(crc);
} else
con->out_msg->footer.middle_crc = 0;
- con->out_msg->footer.data_crc = 0;
- dout("prepare_write_message front_crc %u data_crc %u\n",
+ dout("%s front_crc %u middle_crc %u\n", __func__,
le32_to_cpu(con->out_msg->footer.front_crc),
le32_to_cpu(con->out_msg->footer.middle_crc));

/* is there a data payload? */
- if (le32_to_cpu(m->hdr.data_len) > 0) {
- /* initialize page iterator */
- con->out_msg_pos.page = 0;
- if (m->pages)
- con->out_msg_pos.page_pos = m->page_alignment;
- else
- con->out_msg_pos.page_pos = 0;
- con->out_msg_pos.data_pos = 0;
- con->out_msg_pos.did_page_crc = false;
- con->out_more = 1; /* data + footer will follow */
- } else {
+ con->out_msg->footer.data_crc = 0;
+ if (m->hdr.data_len)
+ prepare_write_message_data(con);
+ else
/* no, queue up footer too and be done */
prepare_write_message_footer(con);
- }

- set_bit(WRITE_PENDING, &con->state);
+ set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
}

/*
@@ -626,16 +805,16 @@ static void prepare_write_ack(struct ceph_connection *con)
con->in_seq_acked, con->in_seq);
con->in_seq_acked = con->in_seq;

- ceph_con_out_kvec_reset(con);
+ con_out_kvec_reset(con);

- ceph_con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
+ con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);

con->out_temp_ack = cpu_to_le64(con->in_seq_acked);
- ceph_con_out_kvec_add(con, sizeof (con->out_temp_ack),
+ con_out_kvec_add(con, sizeof (con->out_temp_ack),
&con->out_temp_ack);

con->out_more = 1; /* more will follow.. eventually.. */
- set_bit(WRITE_PENDING, &con->state);
+ set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
}

/*
@@ -644,63 +823,60 @@ static void prepare_write_ack(struct ceph_connection *con)
static void prepare_write_keepalive(struct ceph_connection *con)
{
dout("prepare_write_keepalive %p\n", con);
- ceph_con_out_kvec_reset(con);
- ceph_con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive);
- set_bit(WRITE_PENDING, &con->state);
+ con_out_kvec_reset(con);
+ con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive);
+ set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
}

/*
* Connection negotiation.
*/

-static int prepare_connect_authorizer(struct ceph_connection *con)
+static struct ceph_auth_handshake *get_connect_authorizer(struct ceph_connection *con,
+ int *auth_proto)
{
- void *auth_buf;
- int auth_len = 0;
- int auth_protocol = 0;
+ struct ceph_auth_handshake *auth;
+
+ if (!con->ops->get_authorizer) {
+ con->out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN;
+ con->out_connect.authorizer_len = 0;
+ return NULL;
+ }

+ /* Can't hold the mutex while getting authorizer */
mutex_unlock(&con->mutex);
- if (con->ops->get_authorizer)
- con->ops->get_authorizer(con, &auth_buf, &auth_len,
- &auth_protocol, &con->auth_reply_buf,
- &con->auth_reply_buf_len,
- con->auth_retry);
+ auth = con->ops->get_authorizer(con, auth_proto, con->auth_retry);
mutex_lock(&con->mutex);

- if (test_bit(CLOSED, &con->state) ||
- test_bit(OPENING, &con->state))
- return -EAGAIN;
+ if (IS_ERR(auth))
+ return auth;
+ if (con->state != CON_STATE_NEGOTIATING)
+ return ERR_PTR(-EAGAIN);

- con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol);
- con->out_connect.authorizer_len = cpu_to_le32(auth_len);
-
- if (auth_len)
- ceph_con_out_kvec_add(con, auth_len, auth_buf);
-
- return 0;
+ con->auth_reply_buf = auth->authorizer_reply_buf;
+ con->auth_reply_buf_len = auth->authorizer_reply_buf_len;
+ return auth;
}

/*
* We connected to a peer and are saying hello.
*/
-static void prepare_write_banner(struct ceph_messenger *msgr,
- struct ceph_connection *con)
+static void prepare_write_banner(struct ceph_connection *con)
{
- ceph_con_out_kvec_reset(con);
- ceph_con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER);
- ceph_con_out_kvec_add(con, sizeof (msgr->my_enc_addr),
- &msgr->my_enc_addr);
+ con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER);
+ con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr),
+ &con->msgr->my_enc_addr);

con->out_more = 0;
- set_bit(WRITE_PENDING, &con->state);
+ set_bit(CON_FLAG_WRITE_PENDING, &con->flags);
}

-static int prepare_write_connect(struct ceph_messenger *msgr,
- struct ceph_connection *con,
- int include_banner)
+static int prepare_write_connect(struct ceph_connection *con)
{
unsigned global_seq = get_global_seq(con->msgr, 0);
int proto;
+ int auth_proto;
+ struct ceph_auth_handshake *auth;

switch (con->peer_name.type) {
case CEPH_ENTITY_TYPE_MON:
@@ -719,23 +895,32 @@ static int prepare_write_connect(struct ceph_messenger *msgr,
dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
con->connect_seq, global_seq, proto);

- con->out_connect.features = cpu_to_le64(msgr->supported_features);
+ con->out_connect.features = cpu_to_le64(con->msgr->supported_features);
con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
con->out_connect.connect_seq = cpu_to_le32(con->connect_seq);
con->out_connect.global_seq = cpu_to_le32(global_seq);
con->out_connect.protocol_version = cpu_to_le32(proto);
con->out_connect.flags = 0;

- if (include_banner)
- prepare_write_banner(msgr, con);
- else
- ceph_con_out_kvec_reset(con);
- ceph_con_out_kvec_add(con, sizeof (con->out_connect), &con->out_connect);
+ auth_proto = CEPH_AUTH_UNKNOWN;
+ auth = get_connect_authorizer(con, &auth_proto);
+ if (IS_ERR(auth))
+ return PTR_ERR(auth);
+
+ con->out_connect.authorizer_protocol = cpu_to_le32(auth_proto);
+ con->out_connect.authorizer_len = auth ?
+ cpu_to_le32(auth->authorizer_buf_len) : 0;
+
+ con_out_kvec_add(con, sizeof (con->out_connect),
+ &con->out_connect);
+ if (auth && auth->authorizer_buf_len)
+ con_out_kvec_add(con, auth->authorizer_buf_len,
+ auth->authorizer_buf);

con->out_more = 0;
- set_bit(WRITE_PENDING, &con->state);
+ set_bit(CON_FLAG_WRITE_PENDING, &con->flags);

- return prepare_connect_authorizer(con);
+ return 0;
}

/*
@@ -781,30 +966,34 @@ out:
return ret; /* done! */
}

-#ifdef CONFIG_BLOCK
-static void init_bio_iter(struct bio *bio, struct bio **iter, int *seg)
+static void out_msg_pos_next(struct ceph_connection *con, struct page *page,
+ size_t len, size_t sent, bool in_trail)
{
- if (!bio) {
- *iter = NULL;
- *seg = 0;
- return;
- }
- *iter = bio;
- *seg = bio->bi_idx;
-}
+ struct ceph_msg *msg = con->out_msg;

-static void iter_bio_next(struct bio **bio_iter, int *seg)
-{
- if (*bio_iter == NULL)
- return;
+ BUG_ON(!msg);
+ BUG_ON(!sent);

- BUG_ON(*seg >= (*bio_iter)->bi_vcnt);
+ con->out_msg_pos.data_pos += sent;
+ con->out_msg_pos.page_pos += sent;
+ if (sent < len)
+ return;

- (*seg)++;
- if (*seg == (*bio_iter)->bi_vcnt)
- init_bio_iter((*bio_iter)->bi_next, bio_iter, seg);
-}
+ BUG_ON(sent != len);
+ con->out_msg_pos.page_pos = 0;
+ con->out_msg_pos.page++;
+ con->out_msg_pos.did_page_crc = false;
+ if (in_trail)
+ list_move_tail(&page->lru,
+ &msg->trail->head);
+ else if (msg->pagelist)
+ list_move_tail(&page->lru,
+ &msg->pagelist->head);
+#ifdef CONFIG_BLOCK
+ else if (msg->bio)
+ iter_bio_next(&msg->bio_iter, &msg->bio_seg);
#endif
+}

/*
* Write as much message data payload as we can. If we finish, queue
@@ -821,41 +1010,36 @@ static int write_partial_msg_pages(struct ceph_connection *con)
bool do_datacrc = !con->msgr->nocrc;
int ret;
int total_max_write;
- int in_trail = 0;
- size_t trail_len = (msg->trail ? msg->trail->length : 0);
+ bool in_trail = false;
+ const size_t trail_len = (msg->trail ? msg->trail->length : 0);
+ const size_t trail_off = data_len - trail_len;

dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n",
- con, con->out_msg, con->out_msg_pos.page, con->out_msg->nr_pages,
+ con, msg, con->out_msg_pos.page, msg->nr_pages,
con->out_msg_pos.page_pos);

-#ifdef CONFIG_BLOCK
- if (msg->bio && !msg->bio_iter)
- init_bio_iter(msg->bio, &msg->bio_iter, &msg->bio_seg);
-#endif
-
+ /*
+ * Iterate through each page that contains data to be
+ * written, and send as much as possible for each.
+ *
+ * If we are calculating the data crc (the default), we will
+ * need to map the page. If we have no pages, they have
+ * been revoked, so use the zero page.
+ */
while (data_len > con->out_msg_pos.data_pos) {
struct page *page = NULL;
int max_write = PAGE_SIZE;
int bio_offset = 0;

- total_max_write = data_len - trail_len -
- con->out_msg_pos.data_pos;
-
- /*
- * if we are calculating the data crc (the default), we need
- * to map the page. if our pages[] has been revoked, use the
- * zero page.
- */
-
- /* have we reached the trail part of the data? */
- if (con->out_msg_pos.data_pos >= data_len - trail_len) {
- in_trail = 1;
+ in_trail = in_trail || con->out_msg_pos.data_pos >= trail_off;
+ if (!in_trail)
+ total_max_write = trail_off - con->out_msg_pos.data_pos;

+ if (in_trail) {
total_max_write = data_len - con->out_msg_pos.data_pos;

page = list_first_entry(&msg->trail->head,
struct page, lru);
- max_write = PAGE_SIZE;
} else if (msg->pages) {
page = msg->pages[con->out_msg_pos.page];
} else if (msg->pagelist) {
@@ -878,52 +1062,32 @@ static int write_partial_msg_pages(struct ceph_connection *con)

if (do_datacrc && !con->out_msg_pos.did_page_crc) {
void *base;
- u32 crc;
- u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc);
+ u32 crc = le32_to_cpu(msg->footer.data_crc);
char *kaddr;

kaddr = kmap(page);
BUG_ON(kaddr == NULL);
base = kaddr + con->out_msg_pos.page_pos + bio_offset;
- crc = crc32c(tmpcrc, base, len);
- con->out_msg->footer.data_crc = cpu_to_le32(crc);
+ crc = crc32c(crc, base, len);
+ kunmap(page);
+ msg->footer.data_crc = cpu_to_le32(crc);
con->out_msg_pos.did_page_crc = true;
}
ret = ceph_tcp_sendpage(con->sock, page,
con->out_msg_pos.page_pos + bio_offset,
len, 1);
-
- if (do_datacrc)
- kunmap(page);
-
if (ret <= 0)
goto out;

- con->out_msg_pos.data_pos += ret;
- con->out_msg_pos.page_pos += ret;
- if (ret == len) {
- con->out_msg_pos.page_pos = 0;
- con->out_msg_pos.page++;
- con->out_msg_pos.did_page_crc = false;
- if (in_trail)
- list_move_tail(&page->lru,
- &msg->trail->head);
- else if (msg->pagelist)
- list_move_tail(&page->lru,
- &msg->pagelist->head);
-#ifdef CONFIG_BLOCK
- else if (msg->bio)
- iter_bio_next(&msg->bio_iter, &msg->bio_seg);
-#endif
- }
+ out_msg_pos_next(con, page, len, (size_t) ret, in_trail);
}

dout("write_partial_msg_pages %p msg %p done\n", con, msg);

/* prepare and queue up footer, too */
if (!do_datacrc)
- con->out_msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
- ceph_con_out_kvec_reset(con);
+ msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
+ con_out_kvec_reset(con);
prepare_write_message_footer(con);
ret = 1;
out:
@@ -992,11 +1156,10 @@ static int prepare_read_message(struct ceph_connection *con)


static int read_partial(struct ceph_connection *con,
- int *to, int size, void *object)
+ int end, int size, void *object)
{
- *to += size;
- while (con->in_base_pos < *to) {
- int left = *to - con->in_base_pos;
+ while (con->in_base_pos < end) {
+ int left = end - con->in_base_pos;
int have = size - left;
int ret = ceph_tcp_recvmsg(con->sock, object + have, left);
if (ret <= 0)
@@ -1012,37 +1175,52 @@ static int read_partial(struct ceph_connection *con,
*/
static int read_partial_banner(struct ceph_connection *con)
{
- int ret, to = 0;
+ int size;
+ int end;
+ int ret;

dout("read_partial_banner %p at %d\n", con, con->in_base_pos);

/* peer's banner */
- ret = read_partial(con, &to, strlen(CEPH_BANNER), con->in_banner);
+ size = strlen(CEPH_BANNER);
+ end = size;
+ ret = read_partial(con, end, size, con->in_banner);
if (ret <= 0)
goto out;
- ret = read_partial(con, &to, sizeof(con->actual_peer_addr),
- &con->actual_peer_addr);
+
+ size = sizeof (con->actual_peer_addr);
+ end += size;
+ ret = read_partial(con, end, size, &con->actual_peer_addr);
if (ret <= 0)
goto out;
- ret = read_partial(con, &to, sizeof(con->peer_addr_for_me),
- &con->peer_addr_for_me);
+
+ size = sizeof (con->peer_addr_for_me);
+ end += size;
+ ret = read_partial(con, end, size, &con->peer_addr_for_me);
if (ret <= 0)
goto out;
+
out:
return ret;
}

static int read_partial_connect(struct ceph_connection *con)
{
- int ret, to = 0;
+ int size;
+ int end;
+ int ret;

dout("read_partial_connect %p at %d\n", con, con->in_base_pos);

- ret = read_partial(con, &to, sizeof(con->in_reply), &con->in_reply);
+ size = sizeof (con->in_reply);
+ end = size;
+ ret = read_partial(con, end, size, &con->in_reply);
if (ret <= 0)
goto out;
- ret = read_partial(con, &to, le32_to_cpu(con->in_reply.authorizer_len),
- con->auth_reply_buf);
+
+ size = le32_to_cpu(con->in_reply.authorizer_len);
+ end += size;
+ ret = read_partial(con, end, size, con->auth_reply_buf);
if (ret <= 0)
goto out;

@@ -1321,20 +1499,14 @@ static int process_banner(struct ceph_connection *con)
ceph_pr_addr(&con->msgr->inst.addr.in_addr));
}

- set_bit(NEGOTIATING, &con->state);
- prepare_read_connect(con);
return 0;
}

static void fail_protocol(struct ceph_connection *con)
{
reset_connection(con);
- set_bit(CLOSED, &con->state); /* in case there's queued work */
-
- mutex_unlock(&con->mutex);
- if (con->ops->bad_proto)
- con->ops->bad_proto(con);
- mutex_lock(&con->mutex);
+ BUG_ON(con->state != CON_STATE_NEGOTIATING);
+ con->state = CON_STATE_CLOSED;
}

static int process_connect(struct ceph_connection *con)
@@ -1377,7 +1549,8 @@ static int process_connect(struct ceph_connection *con)
return -1;
}
con->auth_retry = 1;
- ret = prepare_write_connect(con->msgr, con, 0);
+ con_out_kvec_reset(con);
+ ret = prepare_write_connect(con);
if (ret < 0)
return ret;
prepare_read_connect(con);
@@ -1392,12 +1565,15 @@ static int process_connect(struct ceph_connection *con)
* dropped messages.
*/
dout("process_connect got RESET peer seq %u\n",
- le32_to_cpu(con->in_connect.connect_seq));
+ le32_to_cpu(con->in_reply.connect_seq));
pr_err("%s%lld %s connection reset\n",
ENTITY_NAME(con->peer_name),
ceph_pr_addr(&con->peer_addr.in_addr));
reset_connection(con);
- prepare_write_connect(con->msgr, con, 0);
+ con_out_kvec_reset(con);
+ ret = prepare_write_connect(con);
+ if (ret < 0)
+ return ret;
prepare_read_connect(con);

/* Tell ceph about it. */
@@ -1406,8 +1582,7 @@ static int process_connect(struct ceph_connection *con)
if (con->ops->peer_reset)
con->ops->peer_reset(con);
mutex_lock(&con->mutex);
- if (test_bit(CLOSED, &con->state) ||
- test_bit(OPENING, &con->state))
+ if (con->state != CON_STATE_NEGOTIATING)
return -EAGAIN;
break;

@@ -1416,11 +1591,14 @@ static int process_connect(struct ceph_connection *con)
* If we sent a smaller connect_seq than the peer has, try
* again with a larger value.
*/
- dout("process_connect got RETRY my seq = %u, peer_seq = %u\n",
+ dout("process_connect got RETRY_SESSION my seq %u, peer %u\n",
le32_to_cpu(con->out_connect.connect_seq),
- le32_to_cpu(con->in_connect.connect_seq));
- con->connect_seq = le32_to_cpu(con->in_connect.connect_seq);
- prepare_write_connect(con->msgr, con, 0);
+ le32_to_cpu(con->in_reply.connect_seq));
+ con->connect_seq = le32_to_cpu(con->in_reply.connect_seq);
+ con_out_kvec_reset(con);
+ ret = prepare_write_connect(con);
+ if (ret < 0)
+ return ret;
prepare_read_connect(con);
break;

@@ -1431,10 +1609,13 @@ static int process_connect(struct ceph_connection *con)
*/
dout("process_connect got RETRY_GLOBAL my %u peer_gseq %u\n",
con->peer_global_seq,
- le32_to_cpu(con->in_connect.global_seq));
+ le32_to_cpu(con->in_reply.global_seq));
get_global_seq(con->msgr,
- le32_to_cpu(con->in_connect.global_seq));
- prepare_write_connect(con->msgr, con, 0);
+ le32_to_cpu(con->in_reply.global_seq));
+ con_out_kvec_reset(con);
+ ret = prepare_write_connect(con);
+ if (ret < 0)
+ return ret;
prepare_read_connect(con);
break;

@@ -1449,7 +1630,10 @@ static int process_connect(struct ceph_connection *con)
fail_protocol(con);
return -1;
}
- clear_bit(CONNECTING, &con->state);
+
+ BUG_ON(con->state != CON_STATE_NEGOTIATING);
+ con->state = CON_STATE_OPEN;
+
con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq);
con->connect_seq++;
con->peer_features = server_feat;
@@ -1461,7 +1645,9 @@ static int process_connect(struct ceph_connection *con)
le32_to_cpu(con->in_reply.connect_seq));

if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY)
- set_bit(LOSSYTX, &con->state);
+ set_bit(CON_FLAG_LOSSYTX, &con->flags);
+
+ con->delay = 0; /* reset backoff memory */

prepare_read_tag(con);
break;
@@ -1491,10 +1677,10 @@ static int process_connect(struct ceph_connection *con)
*/
static int read_partial_ack(struct ceph_connection *con)
{
- int to = 0;
+ int size = sizeof (con->in_temp_ack);
+ int end = size;

- return read_partial(con, &to, sizeof(con->in_temp_ack),
- &con->in_temp_ack);
+ return read_partial(con, end, size, &con->in_temp_ack);
}


@@ -1547,10 +1733,7 @@ static int read_partial_message_section(struct ceph_connection *con,
return 1;
}

-static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
- struct ceph_msg_header *hdr,
- int *skip);
-
+static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip);

static int read_partial_message_pages(struct ceph_connection *con,
struct page **pages,
@@ -1593,9 +1776,6 @@ static int read_partial_message_bio(struct ceph_connection *con,
void *p;
int ret, left;

- if (IS_ERR(bv))
- return PTR_ERR(bv);
-
left = min((int)(data_len - con->in_msg_pos.data_pos),
(int)(bv->bv_len - con->in_msg_pos.page_pos));

@@ -1627,26 +1807,22 @@ static int read_partial_message_bio(struct ceph_connection *con,
static int read_partial_message(struct ceph_connection *con)
{
struct ceph_msg *m = con->in_msg;
+ int size;
+ int end;
int ret;
- int to, left;
unsigned front_len, middle_len, data_len;
bool do_datacrc = !con->msgr->nocrc;
- int skip;
u64 seq;
u32 crc;

dout("read_partial_message con %p msg %p\n", con, m);

/* header */
- while (con->in_base_pos < sizeof(con->in_hdr)) {
- left = sizeof(con->in_hdr) - con->in_base_pos;
- ret = ceph_tcp_recvmsg(con->sock,
- (char *)&con->in_hdr + con->in_base_pos,
- left);
- if (ret <= 0)
- return ret;
- con->in_base_pos += ret;
- }
+ size = sizeof (con->in_hdr);
+ end = size;
+ ret = read_partial(con, end, size, &con->in_hdr);
+ if (ret <= 0)
+ return ret;

crc = crc32c(0, &con->in_hdr, offsetof(struct ceph_msg_header, crc));
if (cpu_to_le32(crc) != con->in_hdr.crc) {
@@ -1686,10 +1862,13 @@ static int read_partial_message(struct ceph_connection *con)

/* allocate message? */
if (!con->in_msg) {
+ int skip = 0;
+
dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
con->in_hdr.front_len, con->in_hdr.data_len);
- skip = 0;
- con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip);
+ ret = ceph_con_in_msg_alloc(con, &skip);
+ if (ret < 0)
+ return ret;
if (skip) {
/* skip this message */
dout("alloc_msg said skip message\n");
@@ -1700,11 +1879,9 @@ static int read_partial_message(struct ceph_connection *con)
con->in_seq++;
return 0;
}
- if (!con->in_msg) {
- con->error_msg =
- "error allocating memory for incoming message";
- return -ENOMEM;
- }
+
+ BUG_ON(!con->in_msg);
+ BUG_ON(con->in_msg->con != con);
m = con->in_msg;
m->front.iov_len = 0; /* haven't read it yet */
if (m->middle)
@@ -1716,6 +1893,11 @@ static int read_partial_message(struct ceph_connection *con)
else
con->in_msg_pos.page_pos = 0;
con->in_msg_pos.data_pos = 0;
+
+#ifdef CONFIG_BLOCK
+ if (m->bio)
+ init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg);
+#endif
}

/* front */
@@ -1732,10 +1914,6 @@ static int read_partial_message(struct ceph_connection *con)
if (ret <= 0)
return ret;
}
-#ifdef CONFIG_BLOCK
- if (m->bio && !m->bio_iter)
- init_bio_iter(m->bio, &m->bio_iter, &m->bio_seg);
-#endif

/* (page) data */
while (con->in_msg_pos.data_pos < data_len) {
@@ -1746,7 +1924,7 @@ static int read_partial_message(struct ceph_connection *con)
return ret;
#ifdef CONFIG_BLOCK
} else if (m->bio) {
-
+ BUG_ON(!m->bio_iter);
ret = read_partial_message_bio(con,
&m->bio_iter, &m->bio_seg,
data_len, do_datacrc);
@@ -1759,16 +1937,12 @@ static int read_partial_message(struct ceph_connection *con)
}

/* footer */
- to = sizeof(m->hdr) + sizeof(m->footer);
- while (con->in_base_pos < to) {
- left = to - con->in_base_pos;
- ret = ceph_tcp_recvmsg(con->sock, (char *)&m->footer +
- (con->in_base_pos - sizeof(m->hdr)),
- left);
- if (ret <= 0)
- return ret;
- con->in_base_pos += ret;
- }
+ size = sizeof (m->footer);
+ end += size;
+ ret = read_partial(con, end, size, &m->footer);
+ if (ret <= 0)
+ return ret;
+
dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n",
m, front_len, m->footer.front_crc, middle_len,
m->footer.middle_crc, data_len, m->footer.data_crc);
@@ -1804,8 +1978,11 @@ static void process_message(struct ceph_connection *con)
{
struct ceph_msg *msg;

+ BUG_ON(con->in_msg->con != con);
+ con->in_msg->con = NULL;
msg = con->in_msg;
con->in_msg = NULL;
+ con->ops->put(con);

/* if first message, set peer_name */
if (con->peer_name.type == 0)
@@ -1825,7 +2002,6 @@ static void process_message(struct ceph_connection *con)
con->ops->dispatch(con, msg);

mutex_lock(&con->mutex);
- prepare_read_tag(con);
}


@@ -1835,21 +2011,21 @@ static void process_message(struct ceph_connection *con)
*/
static int try_write(struct ceph_connection *con)
{
- struct ceph_messenger *msgr = con->msgr;
int ret = 1;

- dout("try_write start %p state %lu nref %d\n", con, con->state,
- atomic_read(&con->nref));
+ dout("try_write start %p state %lu\n", con, con->state);

more:
dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes);

/* open the socket first? */
- if (con->sock == NULL) {
- prepare_write_connect(msgr, con, 1);
+ if (con->state == CON_STATE_PREOPEN) {
+ BUG_ON(con->sock);
+ con->state = CON_STATE_CONNECTING;
+
+ con_out_kvec_reset(con);
+ prepare_write_banner(con);
prepare_read_banner(con);
- set_bit(CONNECTING, &con->state);
- clear_bit(NEGOTIATING, &con->state);

BUG_ON(con->in_msg);
con->in_tag = CEPH_MSGR_TAG_READY;
@@ -1896,7 +2072,7 @@ more_kvec:
}

do_next:
- if (!test_bit(CONNECTING, &con->state)) {
+ if (con->state == CON_STATE_OPEN) {
/* is anything else pending? */
if (!list_empty(&con->out_queue)) {
prepare_write_message(con);
@@ -1906,14 +2082,15 @@ do_next:
prepare_write_ack(con);
goto more;
}
- if (test_and_clear_bit(KEEPALIVE_PENDING, &con->state)) {
+ if (test_and_clear_bit(CON_FLAG_KEEPALIVE_PENDING,
+ &con->flags)) {
prepare_write_keepalive(con);
goto more;
}
}

/* Nothing to do! */
- clear_bit(WRITE_PENDING, &con->state);
+ clear_bit(CON_FLAG_WRITE_PENDING, &con->flags);
dout("try_write nothing else to write.\n");
ret = 0;
out:
@@ -1930,38 +2107,46 @@ static int try_read(struct ceph_connection *con)
{
int ret = -1;

- if (!con->sock)
- return 0;
-
- if (test_bit(STANDBY, &con->state))
+more:
+ dout("try_read start on %p state %lu\n", con, con->state);
+ if (con->state != CON_STATE_CONNECTING &&
+ con->state != CON_STATE_NEGOTIATING &&
+ con->state != CON_STATE_OPEN)
return 0;

- dout("try_read start on %p\n", con);
+ BUG_ON(!con->sock);

-more:
dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag,
con->in_base_pos);

- /*
- * process_connect and process_message drop and re-take
- * con->mutex. make sure we handle a racing close or reopen.
- */
- if (test_bit(CLOSED, &con->state) ||
- test_bit(OPENING, &con->state)) {
- ret = -EAGAIN;
+ if (con->state == CON_STATE_CONNECTING) {
+ dout("try_read connecting\n");
+ ret = read_partial_banner(con);
+ if (ret <= 0)
+ goto out;
+ ret = process_banner(con);
+ if (ret < 0)
+ goto out;
+
+ BUG_ON(con->state != CON_STATE_CONNECTING);
+ con->state = CON_STATE_NEGOTIATING;
+
+ /*
+ * Received banner is good, exchange connection info.
+ * Do not reset out_kvec, as sending our banner raced
+ * with receiving peer banner after connect completed.
+ */
+ ret = prepare_write_connect(con);
+ if (ret < 0)
+ goto out;
+ prepare_read_connect(con);
+
+ /* Send connection info before awaiting response */
goto out;
}

- if (test_bit(CONNECTING, &con->state)) {
- if (!test_bit(NEGOTIATING, &con->state)) {
- dout("try_read connecting\n");
- ret = read_partial_banner(con);
- if (ret <= 0)
- goto out;
- ret = process_banner(con);
- if (ret < 0)
- goto out;
- }
+ if (con->state == CON_STATE_NEGOTIATING) {
+ dout("try_read negotiating\n");
ret = read_partial_connect(con);
if (ret <= 0)
goto out;
@@ -1971,6 +2156,8 @@ more:
goto more;
}

+ BUG_ON(con->state != CON_STATE_OPEN);
+
if (con->in_base_pos < 0) {
/*
* skipping + discarding content.
@@ -2004,7 +2191,8 @@ more:
prepare_read_ack(con);
break;
case CEPH_MSGR_TAG_CLOSE:
- set_bit(CLOSED, &con->state); /* fixme */
+ con_close_socket(con);
+ con->state = CON_STATE_CLOSED;
goto out;
default:
goto bad_tag;
@@ -2027,6 +2215,8 @@ more:
if (con->in_tag == CEPH_MSGR_TAG_READY)
goto more;
process_message(con);
+ if (con->state == CON_STATE_OPEN)
+ prepare_read_tag(con);
goto more;
}
if (con->in_tag == CEPH_MSGR_TAG_ACK) {
@@ -2055,12 +2245,6 @@ bad_tag:
*/
static void queue_con(struct ceph_connection *con)
{
- if (test_bit(DEAD, &con->state)) {
- dout("queue_con %p ignoring: DEAD\n",
- con);
- return;
- }
-
if (!con->ops->get(con)) {
dout("queue_con %p ref count 0\n", con);
return;
@@ -2085,7 +2269,26 @@ static void con_work(struct work_struct *work)

mutex_lock(&con->mutex);
restart:
- if (test_and_clear_bit(BACKOFF, &con->state)) {
+ if (test_and_clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags)) {
+ switch (con->state) {
+ case CON_STATE_CONNECTING:
+ con->error_msg = "connection failed";
+ break;
+ case CON_STATE_NEGOTIATING:
+ con->error_msg = "negotiation failed";
+ break;
+ case CON_STATE_OPEN:
+ con->error_msg = "socket closed";
+ break;
+ default:
+ dout("unrecognized con state %d\n", (int)con->state);
+ con->error_msg = "unrecognized con state";
+ BUG();
+ }
+ goto fault;
+ }
+
+ if (test_and_clear_bit(CON_FLAG_BACKOFF, &con->flags)) {
dout("con_work %p backing off\n", con);
if (queue_delayed_work(ceph_msgr_wq, &con->work,
round_jiffies_relative(con->delay))) {
@@ -2093,41 +2296,42 @@ restart:
mutex_unlock(&con->mutex);
return;
} else {
- con->ops->put(con);
dout("con_work %p FAILED to back off %lu\n", con,
con->delay);
+ set_bit(CON_FLAG_BACKOFF, &con->flags);
}
+ goto done;
}

- if (test_bit(STANDBY, &con->state)) {
+ if (con->state == CON_STATE_STANDBY) {
dout("con_work %p STANDBY\n", con);
goto done;
}
- if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */
- dout("con_work CLOSED\n");
- con_close_socket(con);
+ if (con->state == CON_STATE_CLOSED) {
+ dout("con_work %p CLOSED\n", con);
+ BUG_ON(con->sock);
goto done;
}
- if (test_and_clear_bit(OPENING, &con->state)) {
- /* reopen w/ new peer */
+ if (con->state == CON_STATE_PREOPEN) {
dout("con_work OPENING\n");
- con_close_socket(con);
+ BUG_ON(con->sock);
}

- if (test_and_clear_bit(SOCK_CLOSED, &con->state))
- goto fault;
-
ret = try_read(con);
if (ret == -EAGAIN)
goto restart;
- if (ret < 0)
+ if (ret < 0) {
+ con->error_msg = "socket error on read";
goto fault;
+ }

ret = try_write(con);
if (ret == -EAGAIN)
goto restart;
- if (ret < 0)
+ if (ret < 0) {
+ con->error_msg = "socket error on write";
goto fault;
+ }

done:
mutex_unlock(&con->mutex);
@@ -2136,7 +2340,6 @@ done_unlocked:
return;

fault:
- mutex_unlock(&con->mutex);
ceph_fault(con); /* error/fault path */
goto done_unlocked;
}
@@ -2147,26 +2350,31 @@ fault:
* exponential backoff
*/
static void ceph_fault(struct ceph_connection *con)
+ __releases(con->mutex)
{
pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name),
ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg);
dout("fault %p state %lu to peer %s\n",
con, con->state, ceph_pr_addr(&con->peer_addr.in_addr));

- if (test_bit(LOSSYTX, &con->state)) {
- dout("fault on LOSSYTX channel\n");
- goto out;
- }
-
- mutex_lock(&con->mutex);
- if (test_bit(CLOSED, &con->state))
- goto out_unlock;
+ BUG_ON(con->state != CON_STATE_CONNECTING &&
+ con->state != CON_STATE_NEGOTIATING &&
+ con->state != CON_STATE_OPEN);

con_close_socket(con);

+ if (test_bit(CON_FLAG_LOSSYTX, &con->flags)) {
+ dout("fault on LOSSYTX channel, marking CLOSED\n");
+ con->state = CON_STATE_CLOSED;
+ goto out_unlock;
+ }
+
if (con->in_msg) {
+ BUG_ON(con->in_msg->con != con);
+ con->in_msg->con = NULL;
ceph_msg_put(con->in_msg);
con->in_msg = NULL;
+ con->ops->put(con);
}

/* Requeue anything that hasn't been acked */
@@ -2175,12 +2383,13 @@ static void ceph_fault(struct ceph_connection *con)
/* If there are no messages queued or keepalive pending, place
* the connection in a STANDBY state */
if (list_empty(&con->out_queue) &&
- !test_bit(KEEPALIVE_PENDING, &con->state)) {
+ !test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags)) {
dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con);
- clear_bit(WRITE_PENDING, &con->state);
- set_bit(STANDBY, &con->state);
+ clear_bit(CON_FLAG_WRITE_PENDING, &con->flags);
+ con->state = CON_STATE_STANDBY;
} else {
/* retry after a delay. */
+ con->state = CON_STATE_PREOPEN;
if (con->delay == 0)
con->delay = BASE_DELAY_INTERVAL;
else if (con->delay < MAX_DELAY_INTERVAL)
@@ -2201,13 +2410,12 @@ static void ceph_fault(struct ceph_connection *con)
* that when con_work restarts we schedule the
* delay then.
*/
- set_bit(BACKOFF, &con->state);
+ set_bit(CON_FLAG_BACKOFF, &con->flags);
}
}

out_unlock:
mutex_unlock(&con->mutex);
-out:
/*
* in case we faulted due to authentication, invalidate our
* current tickets so that we can get new ones.
@@ -2224,18 +2432,14 @@ out:


/*
- * create a new messenger instance
+ * initialize a new messenger instance
*/
-struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr,
- u32 supported_features,
- u32 required_features)
+void ceph_messenger_init(struct ceph_messenger *msgr,
+ struct ceph_entity_addr *myaddr,
+ u32 supported_features,
+ u32 required_features,
+ bool nocrc)
{
- struct ceph_messenger *msgr;
-
- msgr = kzalloc(sizeof(*msgr), GFP_KERNEL);
- if (msgr == NULL)
- return ERR_PTR(-ENOMEM);
-
msgr->supported_features = supported_features;
msgr->required_features = required_features;

@@ -2248,30 +2452,23 @@ struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr,
msgr->inst.addr.type = 0;
get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce));
encode_my_addr(msgr);
+ msgr->nocrc = nocrc;

- dout("messenger_create %p\n", msgr);
- return msgr;
-}
-EXPORT_SYMBOL(ceph_messenger_create);
+ atomic_set(&msgr->stopping, 0);

-void ceph_messenger_destroy(struct ceph_messenger *msgr)
-{
- dout("destroy %p\n", msgr);
- kfree(msgr);
- dout("destroyed messenger %p\n", msgr);
+ dout("%s %p\n", __func__, msgr);
}
-EXPORT_SYMBOL(ceph_messenger_destroy);
+EXPORT_SYMBOL(ceph_messenger_init);

static void clear_standby(struct ceph_connection *con)
{
/* come back from STANDBY? */
- if (test_and_clear_bit(STANDBY, &con->state)) {
- mutex_lock(&con->mutex);
+ if (con->state == CON_STATE_STANDBY) {
dout("clear_standby %p and ++connect_seq\n", con);
+ con->state = CON_STATE_PREOPEN;
con->connect_seq++;
- WARN_ON(test_bit(WRITE_PENDING, &con->state));
- WARN_ON(test_bit(KEEPALIVE_PENDING, &con->state));
- mutex_unlock(&con->mutex);
+ WARN_ON(test_bit(CON_FLAG_WRITE_PENDING, &con->flags));
+ WARN_ON(test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags));
}
}

@@ -2280,21 +2477,24 @@ static void clear_standby(struct ceph_connection *con)
*/
void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
{
- if (test_bit(CLOSED, &con->state)) {
- dout("con_send %p closed, dropping %p\n", con, msg);
- ceph_msg_put(msg);
- return;
- }
-
/* set src+dst */
msg->hdr.src = con->msgr->inst.name;
-
BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len));
-
msg->needs_out_seq = true;

- /* queue */
mutex_lock(&con->mutex);
+
+ if (con->state == CON_STATE_CLOSED) {
+ dout("con_send %p closed, dropping %p\n", con, msg);
+ ceph_msg_put(msg);
+ mutex_unlock(&con->mutex);
+ return;
+ }
+
+ BUG_ON(msg->con != NULL);
+ msg->con = con->ops->get(con);
+ BUG_ON(msg->con == NULL);
+
BUG_ON(!list_empty(&msg->list_head));
list_add_tail(&msg->list_head, &con->out_queue);
dout("----- %p to %s%lld %d=%s len %d+%d+%d -----\n", msg,
@@ -2303,12 +2503,13 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
le32_to_cpu(msg->hdr.front_len),
le32_to_cpu(msg->hdr.middle_len),
le32_to_cpu(msg->hdr.data_len));
+
+ clear_standby(con);
mutex_unlock(&con->mutex);

/* if there wasn't anything waiting to send before, queue
* new work */
- clear_standby(con);
- if (test_and_set_bit(WRITE_PENDING, &con->state) == 0)
+ if (test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0)
queue_con(con);
}
EXPORT_SYMBOL(ceph_con_send);
@@ -2316,24 +2517,34 @@ EXPORT_SYMBOL(ceph_con_send);
/*
* Revoke a message that was previously queued for send
*/
-void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg)
+void ceph_msg_revoke(struct ceph_msg *msg)
{
+ struct ceph_connection *con = msg->con;
+
+ if (!con)
+ return; /* Message not in our possession */
+
mutex_lock(&con->mutex);
if (!list_empty(&msg->list_head)) {
- dout("con_revoke %p msg %p - was on queue\n", con, msg);
+ dout("%s %p msg %p - was on queue\n", __func__, con, msg);
list_del_init(&msg->list_head);
- ceph_msg_put(msg);
+ BUG_ON(msg->con == NULL);
+ msg->con->ops->put(msg->con);
+ msg->con = NULL;
msg->hdr.seq = 0;
+
+ ceph_msg_put(msg);
}
if (con->out_msg == msg) {
- dout("con_revoke %p msg %p - was sending\n", con, msg);
+ dout("%s %p msg %p - was sending\n", __func__, con, msg);
con->out_msg = NULL;
if (con->out_kvec_is_msg) {
con->out_skip = con->out_kvec_bytes;
con->out_kvec_is_msg = false;
}
- ceph_msg_put(msg);
msg->hdr.seq = 0;
+
+ ceph_msg_put(msg);
}
mutex_unlock(&con->mutex);
}
@@ -2341,17 +2552,27 @@ void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg)
/*
* Revoke a message that we may be reading data into
*/
-void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
+void ceph_msg_revoke_incoming(struct ceph_msg *msg)
{
+ struct ceph_connection *con;
+
+ BUG_ON(msg == NULL);
+ if (!msg->con) {
+ dout("%s msg %p null con\n", __func__, msg);
+
+ return; /* Message not in our possession */
+ }
+
+ con = msg->con;
mutex_lock(&con->mutex);
- if (con->in_msg && con->in_msg == msg) {
+ if (con->in_msg == msg) {
unsigned front_len = le32_to_cpu(con->in_hdr.front_len);
unsigned middle_len = le32_to_cpu(con->in_hdr.middle_len);
unsigned data_len = le32_to_cpu(con->in_hdr.data_len);

/* skip rest of message */
- dout("con_revoke_pages %p msg %p revoked\n", con, msg);
- con->in_base_pos = con->in_base_pos -
+ dout("%s %p msg %p revoked\n", __func__, con, msg);
+ con->in_base_pos = con->in_base_pos -
sizeof(struct ceph_msg_header) -
front_len -
middle_len -
@@ -2362,8 +2583,8 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
con->in_tag = CEPH_MSGR_TAG_READY;
con->in_seq++;
} else {
- dout("con_revoke_pages %p msg %p pages %p no-op\n",
- con, con->in_msg, msg);
+ dout("%s %p in_msg %p msg %p no-op\n",
+ __func__, con, con->in_msg, msg);
}
mutex_unlock(&con->mutex);
}
@@ -2374,9 +2595,11 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
void ceph_con_keepalive(struct ceph_connection *con)
{
dout("con_keepalive %p\n", con);
+ mutex_lock(&con->mutex);
clear_standby(con);
- if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 &&
- test_and_set_bit(WRITE_PENDING, &con->state) == 0)
+ mutex_unlock(&con->mutex);
+ if (test_and_set_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags) == 0 &&
+ test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0)
queue_con(con);
}
EXPORT_SYMBOL(ceph_con_keepalive);
@@ -2395,6 +2618,8 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
if (m == NULL)
goto out;
kref_init(&m->kref);
+
+ m->con = NULL;
INIT_LIST_HEAD(&m->list_head);

m->hdr.tid = 0;
@@ -2490,46 +2715,78 @@ static int ceph_alloc_middle(struct ceph_connection *con, struct ceph_msg *msg)
}

/*
- * Generic message allocator, for incoming messages.
+ * Allocate a message for receiving an incoming message on a
+ * connection, and save the result in con->in_msg. Uses the
+ * connection's private alloc_msg op if available.
+ *
+ * Returns 0 on success, or a negative error code.
+ *
+ * On success, if we set *skip = 1:
+ * - the next message should be skipped and ignored.
+ * - con->in_msg == NULL
+ * or if we set *skip = 0:
+ * - con->in_msg is non-null.
+ * On error (ENOMEM, EAGAIN, ...),
+ * - con->in_msg == NULL
*/
-static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con,
- struct ceph_msg_header *hdr,
- int *skip)
+static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
{
+ struct ceph_msg_header *hdr = &con->in_hdr;
int type = le16_to_cpu(hdr->type);
int front_len = le32_to_cpu(hdr->front_len);
int middle_len = le32_to_cpu(hdr->middle_len);
- struct ceph_msg *msg = NULL;
- int ret;
+ int ret = 0;
+
+ BUG_ON(con->in_msg != NULL);

if (con->ops->alloc_msg) {
+ struct ceph_msg *msg;
+
mutex_unlock(&con->mutex);
msg = con->ops->alloc_msg(con, hdr, skip);
mutex_lock(&con->mutex);
- if (!msg || *skip)
- return NULL;
+ if (con->state != CON_STATE_OPEN) {
+ if (msg)
+ ceph_msg_put(msg);
+ return -EAGAIN;
+ }
+ con->in_msg = msg;
+ if (con->in_msg) {
+ con->in_msg->con = con->ops->get(con);
+ BUG_ON(con->in_msg->con == NULL);
+ }
+ if (*skip) {
+ con->in_msg = NULL;
+ return 0;
+ }
+ if (!con->in_msg) {
+ con->error_msg =
+ "error allocating memory for incoming message";
+ return -ENOMEM;
+ }
}
- if (!msg) {
- *skip = 0;
- msg = ceph_msg_new(type, front_len, GFP_NOFS, false);
- if (!msg) {
+ if (!con->in_msg) {
+ con->in_msg = ceph_msg_new(type, front_len, GFP_NOFS, false);
+ if (!con->in_msg) {
pr_err("unable to allocate msg type %d len %d\n",
type, front_len);
- return NULL;
+ return -ENOMEM;
}
- msg->page_alignment = le16_to_cpu(hdr->data_off);
+ con->in_msg->con = con->ops->get(con);
+ BUG_ON(con->in_msg->con == NULL);
+ con->in_msg->page_alignment = le16_to_cpu(hdr->data_off);
}
- memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr));
+ memcpy(&con->in_msg->hdr, &con->in_hdr, sizeof(con->in_hdr));

- if (middle_len && !msg->middle) {
- ret = ceph_alloc_middle(con, msg);
+ if (middle_len && !con->in_msg->middle) {
+ ret = ceph_alloc_middle(con, con->in_msg);
if (ret < 0) {
- ceph_msg_put(msg);
- return NULL;
+ ceph_msg_put(con->in_msg);
+ con->in_msg = NULL;
}
}

- return msg;
+ return ret;
}


diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index 1845cde..89a6409 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -106,9 +106,9 @@ static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len)
monc->pending_auth = 1;
monc->m_auth->front.iov_len = len;
monc->m_auth->hdr.front_len = cpu_to_le32(len);
- ceph_con_revoke(monc->con, monc->m_auth);
+ ceph_msg_revoke(monc->m_auth);
ceph_msg_get(monc->m_auth); /* keep our ref */
- ceph_con_send(monc->con, monc->m_auth);
+ ceph_con_send(&monc->con, monc->m_auth);
}

/*
@@ -117,8 +117,11 @@ static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len)
static void __close_session(struct ceph_mon_client *monc)
{
dout("__close_session closing mon%d\n", monc->cur_mon);
- ceph_con_revoke(monc->con, monc->m_auth);
- ceph_con_close(monc->con);
+ ceph_msg_revoke(monc->m_auth);
+ ceph_msg_revoke_incoming(monc->m_auth_reply);
+ ceph_msg_revoke(monc->m_subscribe);
+ ceph_msg_revoke_incoming(monc->m_subscribe_ack);
+ ceph_con_close(&monc->con);
monc->cur_mon = -1;
monc->pending_auth = 0;
ceph_auth_reset(monc->auth);
@@ -142,9 +145,8 @@ static int __open_session(struct ceph_mon_client *monc)
monc->want_next_osdmap = !!monc->want_next_osdmap;

dout("open_session mon%d opening\n", monc->cur_mon);
- monc->con->peer_name.type = CEPH_ENTITY_TYPE_MON;
- monc->con->peer_name.num = cpu_to_le64(monc->cur_mon);
- ceph_con_open(monc->con,
+ ceph_con_open(&monc->con,
+ CEPH_ENTITY_TYPE_MON, monc->cur_mon,
&monc->monmap->mon_inst[monc->cur_mon].addr);

/* initiatiate authentication handshake */
@@ -226,8 +228,8 @@ static void __send_subscribe(struct ceph_mon_client *monc)

msg->front.iov_len = p - msg->front.iov_base;
msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
- ceph_con_revoke(monc->con, msg);
- ceph_con_send(monc->con, ceph_msg_get(msg));
+ ceph_msg_revoke(msg);
+ ceph_con_send(&monc->con, ceph_msg_get(msg));

monc->sub_sent = jiffies | 1; /* never 0 */
}
@@ -247,7 +249,7 @@ static void handle_subscribe_ack(struct ceph_mon_client *monc,
if (monc->hunting) {
pr_info("mon%d %s session established\n",
monc->cur_mon,
- ceph_pr_addr(&monc->con->peer_addr.in_addr));
+ ceph_pr_addr(&monc->con.peer_addr.in_addr));
monc->hunting = false;
}
dout("handle_subscribe_ack after %d seconds\n", seconds);
@@ -309,6 +311,17 @@ int ceph_monc_open_session(struct ceph_mon_client *monc)
EXPORT_SYMBOL(ceph_monc_open_session);

/*
+ * We require the fsid and global_id in order to initialize our
+ * debugfs dir.
+ */
+static bool have_debugfs_info(struct ceph_mon_client *monc)
+{
+ dout("have_debugfs_info fsid %d globalid %lld\n",
+ (int)monc->client->have_fsid, monc->auth->global_id);
+ return monc->client->have_fsid && monc->auth->global_id > 0;
+}
+
+/*
* The monitor responds with mount ack indicate mount success. The
* included client ticket allows the client to talk to MDSs and OSDs.
*/
@@ -318,9 +331,12 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
struct ceph_client *client = monc->client;
struct ceph_monmap *monmap = NULL, *old = monc->monmap;
void *p, *end;
+ int had_debugfs_info, init_debugfs = 0;

mutex_lock(&monc->mutex);

+ had_debugfs_info = have_debugfs_info(monc);
+
dout("handle_monmap\n");
p = msg->front.iov_base;
end = p + msg->front.iov_len;
@@ -342,12 +358,22 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,

if (!client->have_fsid) {
client->have_fsid = true;
+ if (!had_debugfs_info && have_debugfs_info(monc)) {
+ pr_info("client%lld fsid %pU\n",
+ ceph_client_id(monc->client),
+ &monc->client->fsid);
+ init_debugfs = 1;
+ }
mutex_unlock(&monc->mutex);
- /*
- * do debugfs initialization without mutex to avoid
- * creating a locking dependency
- */
- ceph_debugfs_client_init(client);
+
+ if (init_debugfs) {
+ /*
+ * do debugfs initialization without mutex to avoid
+ * creating a locking dependency
+ */
+ ceph_debugfs_client_init(monc->client);
+ }
+
goto out_unlocked;
}
out:
@@ -439,6 +465,7 @@ static struct ceph_msg *get_generic_reply(struct ceph_connection *con,
m = NULL;
} else {
dout("get_generic_reply %lld got %p\n", tid, req->reply);
+ *skip = 0;
m = ceph_msg_get(req->reply);
/*
* we don't need to track the connection reading into
@@ -461,7 +488,7 @@ static int do_generic_request(struct ceph_mon_client *monc,
req->request->hdr.tid = cpu_to_le64(req->tid);
__insert_generic_request(monc, req);
monc->num_generic_requests++;
- ceph_con_send(monc->con, ceph_msg_get(req->request));
+ ceph_con_send(&monc->con, ceph_msg_get(req->request));
mutex_unlock(&monc->mutex);

err = wait_for_completion_interruptible(&req->completion);
@@ -684,8 +711,9 @@ static void __resend_generic_request(struct ceph_mon_client *monc)

for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) {
req = rb_entry(p, struct ceph_mon_generic_request, node);
- ceph_con_revoke(monc->con, req->request);
- ceph_con_send(monc->con, ceph_msg_get(req->request));
+ ceph_msg_revoke(req->request);
+ ceph_msg_revoke_incoming(req->reply);
+ ceph_con_send(&monc->con, ceph_msg_get(req->request));
}
}

@@ -705,7 +733,7 @@ static void delayed_work(struct work_struct *work)
__close_session(monc);
__open_session(monc); /* continue hunting */
} else {
- ceph_con_keepalive(monc->con);
+ ceph_con_keepalive(&monc->con);

__validate_auth(monc);

@@ -760,19 +788,12 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
goto out;

/* connection */
- monc->con = kmalloc(sizeof(*monc->con), GFP_KERNEL);
- if (!monc->con)
- goto out_monmap;
- ceph_con_init(monc->client->msgr, monc->con);
- monc->con->private = monc;
- monc->con->ops = &mon_con_ops;
-
/* authentication */
monc->auth = ceph_auth_init(cl->options->name,
cl->options->key);
if (IS_ERR(monc->auth)) {
err = PTR_ERR(monc->auth);
- goto out_con;
+ goto out_monmap;
}
monc->auth->want_keys =
CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON |
@@ -801,6 +822,9 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
if (!monc->m_auth)
goto out_auth_reply;

+ ceph_con_init(&monc->con, monc, &mon_con_ops,
+ &monc->client->msgr);
+
monc->cur_mon = -1;
monc->hunting = true;
monc->sub_renew_after = jiffies;
@@ -824,8 +848,6 @@ out_subscribe_ack:
ceph_msg_put(monc->m_subscribe_ack);
out_auth:
ceph_auth_destroy(monc->auth);
-out_con:
- monc->con->ops->put(monc->con);
out_monmap:
kfree(monc->monmap);
out:
@@ -841,12 +863,16 @@ void ceph_monc_stop(struct ceph_mon_client *monc)
mutex_lock(&monc->mutex);
__close_session(monc);

- monc->con->private = NULL;
- monc->con->ops->put(monc->con);
- monc->con = NULL;
-
mutex_unlock(&monc->mutex);

+ /*
+ * flush msgr queue before we destroy ourselves to ensure that:
+ * - any work that references our embedded con is finished.
+ * - any osd_client or other work that may reference an authorizer
+ * finishes before we shut down the auth subsystem.
+ */
+ ceph_msgr_flush();
+
ceph_auth_destroy(monc->auth);

ceph_msg_put(monc->m_auth);
@@ -863,8 +889,10 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
{
int ret;
int was_auth = 0;
+ int had_debugfs_info, init_debugfs = 0;

mutex_lock(&monc->mutex);
+ had_debugfs_info = have_debugfs_info(monc);
if (monc->auth->ops)
was_auth = monc->auth->ops->is_authenticated(monc->auth);
monc->pending_auth = 0;
@@ -880,14 +908,29 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
} else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) {
dout("authenticated, starting session\n");

- monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
- monc->client->msgr->inst.name.num =
+ monc->client->msgr.inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
+ monc->client->msgr.inst.name.num =
cpu_to_le64(monc->auth->global_id);

__send_subscribe(monc);
__resend_generic_request(monc);
}
+
+ if (!had_debugfs_info && have_debugfs_info(monc)) {
+ pr_info("client%lld fsid %pU\n",
+ ceph_client_id(monc->client),
+ &monc->client->fsid);
+ init_debugfs = 1;
+ }
mutex_unlock(&monc->mutex);
+
+ if (init_debugfs) {
+ /*
+ * do debugfs initialization without mutex to avoid
+ * creating a locking dependency
+ */
+ ceph_debugfs_client_init(monc->client);
+ }
}

static int __validate_auth(struct ceph_mon_client *monc)
@@ -992,6 +1035,8 @@ static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con,
case CEPH_MSG_MDS_MAP:
case CEPH_MSG_OSD_MAP:
m = ceph_msg_new(type, front_len, GFP_NOFS, false);
+ if (!m)
+ return NULL; /* ENOMEM--return skip == 0 */
break;
}

@@ -1021,7 +1066,7 @@ static void mon_fault(struct ceph_connection *con)
if (!monc->hunting)
pr_info("mon%d %s session lost, "
"hunting for new mon\n", monc->cur_mon,
- ceph_pr_addr(&monc->con->peer_addr.in_addr));
+ ceph_pr_addr(&monc->con.peer_addr.in_addr));

__close_session(monc);
if (!monc->hunting) {
@@ -1036,9 +1081,23 @@ out:
mutex_unlock(&monc->mutex);
}

+/*
+ * We can ignore refcounting on the connection struct, as all references
+ * will come from the messenger workqueue, which is drained prior to
+ * mon_client destruction.
+ */
+static struct ceph_connection *con_get(struct ceph_connection *con)
+{
+ return con;
+}
+
+static void con_put(struct ceph_connection *con)
+{
+}
+
static const struct ceph_connection_operations mon_con_ops = {
- .get = ceph_con_get,
- .put = ceph_con_put,
+ .get = con_get,
+ .put = con_put,
.dispatch = dispatch,
.fault = mon_fault,
.alloc_msg = mon_alloc_msg,
diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c
index 11d5f41..ddec1c1 100644
--- a/net/ceph/msgpool.c
+++ b/net/ceph/msgpool.c
@@ -12,7 +12,7 @@ static void *msgpool_alloc(gfp_t gfp_mask, void *arg)
struct ceph_msgpool *pool = arg;
struct ceph_msg *msg;

- msg = ceph_msg_new(0, pool->front_len, gfp_mask, true);
+ msg = ceph_msg_new(pool->type, pool->front_len, gfp_mask, true);
if (!msg) {
dout("msgpool_alloc %s failed\n", pool->name);
} else {
@@ -32,10 +32,11 @@ static void msgpool_free(void *element, void *arg)
ceph_msg_put(msg);
}

-int ceph_msgpool_init(struct ceph_msgpool *pool,
+int ceph_msgpool_init(struct ceph_msgpool *pool, int type,
int front_len, int size, bool blocking, const char *name)
{
dout("msgpool %s init\n", name);
+ pool->type = type;
pool->front_len = front_len;
pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool);
if (!pool->pool)
@@ -61,7 +62,7 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool,
WARN_ON(1);

/* try to alloc a fresh message */
- return ceph_msg_new(0, front_len, GFP_NOFS, false);
+ return ceph_msg_new(pool->type, front_len, GFP_NOFS, false);
}

msg = mempool_alloc(pool->pool, GFP_NOFS);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 5e25405..a79dbae 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -52,7 +52,7 @@ static int op_has_extent(int op)
op == CEPH_OSD_OP_WRITE);
}

-void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
+int ceph_calc_raw_layout(struct ceph_osd_client *osdc,
struct ceph_file_layout *layout,
u64 snapid,
u64 off, u64 *plen, u64 *bno,
@@ -62,12 +62,15 @@ void ceph_calc_raw_layout(struct ceph_osd_client *osdc,
struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
u64 orig_len = *plen;
u64 objoff, objlen; /* extent in object */
+ int r;

reqhead->snapid = cpu_to_le64(snapid);

/* object extent? */
- ceph_calc_file_object_mapping(layout, off, plen, bno,
- &objoff, &objlen);
+ r = ceph_calc_file_object_mapping(layout, off, plen, bno,
+ &objoff, &objlen);
+ if (r < 0)
+ return r;
if (*plen < orig_len)
dout(" skipping last %llu, final file extent %llu~%llu\n",
orig_len - *plen, off, *plen);
@@ -83,7 +86,7 @@ void ceph_calc_raw_layout(struct ceph_osd_client *osdc,

dout("calc_layout bno=%llx %llu~%llu (%d pages)\n",
*bno, objoff, objlen, req->r_num_pages);
-
+ return 0;
}
EXPORT_SYMBOL(ceph_calc_raw_layout);

@@ -112,20 +115,25 @@ EXPORT_SYMBOL(ceph_calc_raw_layout);
*
* fill osd op in request message.
*/
-static void calc_layout(struct ceph_osd_client *osdc,
- struct ceph_vino vino,
- struct ceph_file_layout *layout,
- u64 off, u64 *plen,
- struct ceph_osd_request *req,
- struct ceph_osd_req_op *op)
+static int calc_layout(struct ceph_osd_client *osdc,
+ struct ceph_vino vino,
+ struct ceph_file_layout *layout,
+ u64 off, u64 *plen,
+ struct ceph_osd_request *req,
+ struct ceph_osd_req_op *op)
{
u64 bno;
+ int r;

- ceph_calc_raw_layout(osdc, layout, vino.snap, off,
- plen, &bno, req, op);
+ r = ceph_calc_raw_layout(osdc, layout, vino.snap, off,
+ plen, &bno, req, op);
+ if (r < 0)
+ return r;

snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno);
req->r_oid_len = strlen(req->r_oid);
+
+ return r;
}

/*
@@ -139,15 +147,14 @@ void ceph_osdc_release_request(struct kref *kref)

if (req->r_request)
ceph_msg_put(req->r_request);
- if (req->r_reply)
- ceph_msg_put(req->r_reply);
if (req->r_con_filling_msg) {
- dout("release_request revoking pages %p from con %p\n",
+ dout("%s revoking pages %p from con %p\n", __func__,
req->r_pages, req->r_con_filling_msg);
- ceph_con_revoke_message(req->r_con_filling_msg,
- req->r_reply);
- ceph_con_put(req->r_con_filling_msg);
+ ceph_msg_revoke_incoming(req->r_reply);
+ req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
}
+ if (req->r_reply)
+ ceph_msg_put(req->r_reply);
if (req->r_own_pages)
ceph_release_page_vector(req->r_pages,
req->r_num_pages);
@@ -243,6 +250,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
}
ceph_pagelist_init(req->r_trail);
}
+
/* create request message; allow space for oid */
msg_size += MAX_OBJ_NAME_SIZE;
if (snapc)
@@ -256,7 +264,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
return NULL;
}

- msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP);
memset(msg->front.iov_base, 0, msg->front.iov_len);

req->r_request = msg;
@@ -278,7 +285,7 @@ static void osd_req_encode_op(struct ceph_osd_request *req,
{
dst->op = cpu_to_le16(src->op);

- switch (dst->op) {
+ switch (src->op) {
case CEPH_OSD_OP_READ:
case CEPH_OSD_OP_WRITE:
dst->extent.offset =
@@ -624,7 +631,7 @@ static void osd_reset(struct ceph_connection *con)
/*
* Track open sessions with osds.
*/
-static struct ceph_osd *create_osd(struct ceph_osd_client *osdc)
+static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum)
{
struct ceph_osd *osd;

@@ -634,15 +641,13 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc)

atomic_set(&osd->o_ref, 1);
osd->o_osdc = osdc;
+ osd->o_osd = onum;
INIT_LIST_HEAD(&osd->o_requests);
INIT_LIST_HEAD(&osd->o_linger_requests);
INIT_LIST_HEAD(&osd->o_osd_lru);
osd->o_incarnation = 1;

- ceph_con_init(osdc->client->msgr, &osd->o_con);
- osd->o_con.private = osd;
- osd->o_con.ops = &osd_con_ops;
- osd->o_con.peer_name.type = CEPH_ENTITY_TYPE_OSD;
+ ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr);

INIT_LIST_HEAD(&osd->o_keepalive_item);
return osd;
@@ -664,11 +669,11 @@ static void put_osd(struct ceph_osd *osd)
{
dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref),
atomic_read(&osd->o_ref) - 1);
- if (atomic_dec_and_test(&osd->o_ref)) {
+ if (atomic_dec_and_test(&osd->o_ref) && osd->o_auth.authorizer) {
struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth;

- if (osd->o_authorizer)
- ac->ops->destroy_authorizer(ac, osd->o_authorizer);
+ if (ac->ops && ac->ops->destroy_authorizer)
+ ac->ops->destroy_authorizer(ac, osd->o_auth.authorizer);
kfree(osd);
}
}
@@ -752,7 +757,8 @@ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd)
ret = -EAGAIN;
} else {
ceph_con_close(&osd->o_con);
- ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]);
+ ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd,
+ &osdc->osdmap->osd_addr[osd->o_osd]);
osd->o_incarnation++;
}
return ret;
@@ -841,13 +847,19 @@ static void register_request(struct ceph_osd_client *osdc,
static void __unregister_request(struct ceph_osd_client *osdc,
struct ceph_osd_request *req)
{
+ if (RB_EMPTY_NODE(&req->r_node)) {
+ dout("__unregister_request %p tid %lld not registered\n",
+ req, req->r_tid);
+ return;
+ }
+
dout("__unregister_request %p tid %lld\n", req, req->r_tid);
rb_erase(&req->r_node, &osdc->requests);
osdc->num_requests--;

if (req->r_osd) {
/* make sure the original request isn't in flight. */
- ceph_con_revoke(&req->r_osd->o_con, req->r_request);
+ ceph_msg_revoke(req->r_request);

list_del_init(&req->r_osd_item);
if (list_empty(&req->r_osd->o_requests) &&
@@ -874,7 +886,7 @@ static void __unregister_request(struct ceph_osd_client *osdc,
static void __cancel_request(struct ceph_osd_request *req)
{
if (req->r_sent && req->r_osd) {
- ceph_con_revoke(&req->r_osd->o_con, req->r_request);
+ ceph_msg_revoke(req->r_request);
req->r_sent = 0;
}
}
@@ -884,7 +896,9 @@ static void __register_linger_request(struct ceph_osd_client *osdc,
{
dout("__register_linger_request %p\n", req);
list_add_tail(&req->r_linger_item, &osdc->req_linger);
- list_add_tail(&req->r_linger_osd, &req->r_osd->o_linger_requests);
+ if (req->r_osd)
+ list_add_tail(&req->r_linger_osd,
+ &req->r_osd->o_linger_requests);
}

static void __unregister_linger_request(struct ceph_osd_client *osdc,
@@ -992,18 +1006,18 @@ static int __map_request(struct ceph_osd_client *osdc,
req->r_osd = __lookup_osd(osdc, o);
if (!req->r_osd && o >= 0) {
err = -ENOMEM;
- req->r_osd = create_osd(osdc);
+ req->r_osd = create_osd(osdc, o);
if (!req->r_osd) {
list_move(&req->r_req_lru_item, &osdc->req_notarget);
goto out;
}

dout("map_request osd %p is osd%d\n", req->r_osd, o);
- req->r_osd->o_osd = o;
- req->r_osd->o_con.peer_name.num = cpu_to_le64(o);
__insert_osd(osdc, req->r_osd);

- ceph_con_open(&req->r_osd->o_con, &osdc->osdmap->osd_addr[o]);
+ ceph_con_open(&req->r_osd->o_con,
+ CEPH_ENTITY_TYPE_OSD, o,
+ &osdc->osdmap->osd_addr[o]);
}

if (req->r_osd) {
@@ -1210,7 +1224,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
if (req->r_con_filling_msg == con && req->r_reply == msg) {
dout(" dropping con_filling_msg ref %p\n", con);
req->r_con_filling_msg = NULL;
- ceph_con_put(con);
+ con->ops->put(con);
}

if (!req->r_got_reply) {
@@ -1298,8 +1312,9 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)

dout("kick_requests %s\n", force_resend ? " (force resend)" : "");
mutex_lock(&osdc->request_mutex);
- for (p = rb_first(&osdc->requests); p; p = rb_next(p)) {
+ for (p = rb_first(&osdc->requests); p; ) {
req = rb_entry(p, struct ceph_osd_request, r_node);
+ p = rb_next(p);
err = __map_request(osdc, req, force_resend);
if (err < 0)
continue; /* error */
@@ -1307,10 +1322,23 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
dout("%p tid %llu maps to no osd\n", req, req->r_tid);
needmap++; /* request a newer map */
} else if (err > 0) {
- dout("%p tid %llu requeued on osd%d\n", req, req->r_tid,
- req->r_osd ? req->r_osd->o_osd : -1);
- if (!req->r_linger)
+ if (!req->r_linger) {
+ dout("%p tid %llu requeued on osd%d\n", req,
+ req->r_tid,
+ req->r_osd ? req->r_osd->o_osd : -1);
req->r_flags |= CEPH_OSD_FLAG_RETRY;
+ }
+ }
+ if (req->r_linger && list_empty(&req->r_linger_item)) {
+ /*
+ * register as a linger so that we will
+ * re-submit below and get a new tid
+ */
+ dout("%p tid %llu restart on osd%d\n",
+ req, req->r_tid,
+ req->r_osd ? req->r_osd->o_osd : -1);
+ __register_linger_request(osdc, req);
+ __unregister_request(osdc, req);
}
}

@@ -1385,7 +1413,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
epoch, maplen);
newmap = osdmap_apply_incremental(&p, next,
osdc->osdmap,
- osdc->client->msgr);
+ &osdc->client->msgr);
if (IS_ERR(newmap)) {
err = PTR_ERR(newmap);
goto bad;
@@ -1833,11 +1861,12 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
if (!osdc->req_mempool)
goto out;

- err = ceph_msgpool_init(&osdc->msgpool_op, OSD_OP_FRONT_LEN, 10, true,
+ err = ceph_msgpool_init(&osdc->msgpool_op, CEPH_MSG_OSD_OP,
+ OSD_OP_FRONT_LEN, 10, true,
"osd_op");
if (err < 0)
goto out_mempool;
- err = ceph_msgpool_init(&osdc->msgpool_op_reply,
+ err = ceph_msgpool_init(&osdc->msgpool_op_reply, CEPH_MSG_OSD_OPREPLY,
OSD_OPREPLY_FRONT_LEN, 10, true,
"osd_op_reply");
if (err < 0)
@@ -2019,10 +2048,10 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
}

if (req->r_con_filling_msg) {
- dout("get_reply revoking msg %p from old con %p\n",
+ dout("%s revoking msg %p from old con %p\n", __func__,
req->r_reply, req->r_con_filling_msg);
- ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply);
- ceph_con_put(req->r_con_filling_msg);
+ ceph_msg_revoke_incoming(req->r_reply);
+ req->r_con_filling_msg->ops->put(req->r_con_filling_msg);
req->r_con_filling_msg = NULL;
}

@@ -2057,7 +2086,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
#endif
}
*skip = 0;
- req->r_con_filling_msg = ceph_con_get(con);
+ req->r_con_filling_msg = con->ops->get(con);
dout("get_reply tid %lld %p\n", tid, m);

out:
@@ -2074,6 +2103,7 @@ static struct ceph_msg *alloc_msg(struct ceph_connection *con,
int type = le16_to_cpu(hdr->type);
int front = le32_to_cpu(hdr->front_len);

+ *skip = 0;
switch (type) {
case CEPH_MSG_OSD_MAP:
case CEPH_MSG_WATCH_NOTIFY:
@@ -2108,37 +2138,32 @@ static void put_osd_con(struct ceph_connection *con)
/*
* authentication
*/
-static int get_authorizer(struct ceph_connection *con,
- void **buf, int *len, int *proto,
- void **reply_buf, int *reply_len, int force_new)
+/*
+ * Note: returned pointer is the address of a structure that's
+ * managed separately. Caller must *not* attempt to free it.
+ */
+static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
+ int *proto, int force_new)
{
struct ceph_osd *o = con->private;
struct ceph_osd_client *osdc = o->o_osdc;
struct ceph_auth_client *ac = osdc->client->monc.auth;
- int ret = 0;
+ struct ceph_auth_handshake *auth = &o->o_auth;

- if (force_new && o->o_authorizer) {
- ac->ops->destroy_authorizer(ac, o->o_authorizer);
- o->o_authorizer = NULL;
- }
- if (o->o_authorizer == NULL) {
- ret = ac->ops->create_authorizer(
- ac, CEPH_ENTITY_TYPE_OSD,
- &o->o_authorizer,
- &o->o_authorizer_buf,
- &o->o_authorizer_buf_len,
- &o->o_authorizer_reply_buf,
- &o->o_authorizer_reply_buf_len);
+ if (force_new && auth->authorizer) {
+ if (ac->ops && ac->ops->destroy_authorizer)
+ ac->ops->destroy_authorizer(ac, auth->authorizer);
+ auth->authorizer = NULL;
+ }
+ if (!auth->authorizer && ac->ops && ac->ops->create_authorizer) {
+ int ret = ac->ops->create_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
+ auth);
if (ret)
- return ret;
+ return ERR_PTR(ret);
}
-
*proto = ac->protocol;
- *buf = o->o_authorizer_buf;
- *len = o->o_authorizer_buf_len;
- *reply_buf = o->o_authorizer_reply_buf;
- *reply_len = o->o_authorizer_reply_buf_len;
- return 0;
+
+ return auth;
}


@@ -2148,7 +2173,11 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len)
struct ceph_osd_client *osdc = o->o_osdc;
struct ceph_auth_client *ac = osdc->client->monc.auth;

- return ac->ops->verify_authorizer_reply(ac, o->o_authorizer, len);
+ /*
+ * XXX If ac->ops or ac->ops->verify_authorizer_reply is null,
+ * XXX which do we do: succeed or fail?
+ */
+ return ac->ops->verify_authorizer_reply(ac, o->o_auth.authorizer, len);
}

static int invalidate_authorizer(struct ceph_connection *con)
@@ -2157,7 +2186,7 @@ static int invalidate_authorizer(struct ceph_connection *con)
struct ceph_osd_client *osdc = o->o_osdc;
struct ceph_auth_client *ac = osdc->client->monc.auth;

- if (ac->ops->invalidate_authorizer)
+ if (ac->ops && ac->ops->invalidate_authorizer)
ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_OSD);

return ceph_monc_validate_auth(&osdc->client->monc);
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 29ad46e..430076e 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -495,15 +495,16 @@ static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
ceph_decode_32_safe(p, end, pool, bad);
ceph_decode_32_safe(p, end, len, bad);
dout(" pool %d len %d\n", pool, len);
+ ceph_decode_need(p, end, len, bad);
pi = __lookup_pg_pool(&map->pg_pools, pool);
if (pi) {
+ char *name = kstrndup(*p, len, GFP_NOFS);
+
+ if (!name)
+ return -ENOMEM;
kfree(pi->name);
- pi->name = kmalloc(len + 1, GFP_NOFS);
- if (pi->name) {
- memcpy(pi->name, *p, len);
- pi->name[len] = '\0';
- dout(" name is %s\n", pi->name);
- }
+ pi->name = name;
+ dout(" name is %s\n", pi->name);
}
*p += len;
}
@@ -673,6 +674,9 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end)
ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad);
ceph_decode_copy(p, &pgid, sizeof(pgid));
n = ceph_decode_32(p);
+ err = -EINVAL;
+ if (n > (UINT_MAX - sizeof(*pg)) / sizeof(u32))
+ goto bad;
ceph_decode_need(p, end, n * sizeof(u32), bad);
err = -ENOMEM;
pg = kmalloc(sizeof(*pg) + n*sizeof(u32), GFP_NOFS);
@@ -890,8 +894,16 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
pglen = ceph_decode_32(p);

if (pglen) {
- /* insert */
ceph_decode_need(p, end, pglen*sizeof(u32), bad);
+
+ /* removing existing (if any) */
+ (void) __remove_pg_mapping(&map->pg_temp, pgid);
+
+ /* insert */
+ if (pglen > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) {
+ err = -EINVAL;
+ goto bad;
+ }
pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS);
if (!pg) {
err = -ENOMEM;
@@ -940,7 +952,7 @@ bad:
* for now, we write only a single su, until we can
* pass a stride back to the caller.
*/
-void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
+int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
u64 off, u64 *plen,
u64 *ono,
u64 *oxoff, u64 *oxlen)
@@ -954,11 +966,17 @@ void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,

dout("mapping %llu~%llu osize %u fl_su %u\n", off, *plen,
osize, su);
+ if (su == 0 || sc == 0)
+ goto invalid;
su_per_object = osize / su;
+ if (su_per_object == 0)
+ goto invalid;
dout("osize %u / su %u = su_per_object %u\n", osize, su,
su_per_object);

- BUG_ON((su & ~PAGE_MASK) != 0);
+ if ((su & ~PAGE_MASK) != 0)
+ goto invalid;
+
/* bl = *off / su; */
t = off;
do_div(t, su);
@@ -986,6 +1004,14 @@ void ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
*plen = *oxlen;

dout(" obj extent %llu~%llu\n", *oxoff, *oxlen);
+ return 0;
+
+invalid:
+ dout(" invalid layout\n");
+ *ono = 0;
+ *oxoff = 0;
+ *oxlen = 0;
+ return -EINVAL;
}
EXPORT_SYMBOL(ceph_calc_file_object_mapping);

diff --git a/net/core/dev.c b/net/core/dev.c
index 24a21f3..eb858dc 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2763,8 +2763,10 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
if (unlikely(tcpu != next_cpu) &&
(tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
((int)(per_cpu(softnet_data, tcpu).input_queue_head -
- rflow->last_qtail)) >= 0))
+ rflow->last_qtail)) >= 0)) {
+ tcpu = next_cpu;
rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
+ }

if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
*rflowp = rflow;
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 626698f..76f6d0b 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -308,7 +308,8 @@ int dev_addr_del(struct net_device *dev, unsigned char *addr,
*/
ha = list_first_entry(&dev->dev_addrs.list,
struct netdev_hw_addr, list);
- if (ha->addr == dev->dev_addr && ha->refcount == 1)
+ if (!memcmp(ha->addr, addr, dev->addr_len) &&
+ ha->type == addr_type && ha->refcount == 1)
return -ENOENT;

err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 2fd0fba..59ef40a 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -456,19 +456,28 @@ static int do_ip_setsockopt(struct sock *sk, int level,
struct inet_sock *inet = inet_sk(sk);
int val = 0, err;

- if (((1<<optname) & ((1<<IP_PKTINFO) | (1<<IP_RECVTTL) |
- (1<<IP_RECVOPTS) | (1<<IP_RECVTOS) |
- (1<<IP_RETOPTS) | (1<<IP_TOS) |
- (1<<IP_TTL) | (1<<IP_HDRINCL) |
- (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
- (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
- (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) |
- (1<<IP_MINTTL) | (1<<IP_NODEFRAG))) ||
- optname == IP_UNICAST_IF ||
- optname == IP_MULTICAST_TTL ||
- optname == IP_MULTICAST_ALL ||
- optname == IP_MULTICAST_LOOP ||
- optname == IP_RECVORIGDSTADDR) {
+ switch (optname) {
+ case IP_PKTINFO:
+ case IP_RECVTTL:
+ case IP_RECVOPTS:
+ case IP_RECVTOS:
+ case IP_RETOPTS:
+ case IP_TOS:
+ case IP_TTL:
+ case IP_HDRINCL:
+ case IP_MTU_DISCOVER:
+ case IP_RECVERR:
+ case IP_ROUTER_ALERT:
+ case IP_FREEBIND:
+ case IP_PASSSEC:
+ case IP_TRANSPARENT:
+ case IP_MINTTL:
+ case IP_NODEFRAG:
+ case IP_UNICAST_IF:
+ case IP_MULTICAST_TTL:
+ case IP_MULTICAST_ALL:
+ case IP_MULTICAST_LOOP:
+ case IP_RECVORIGDSTADDR:
if (optlen >= sizeof(int)) {
if (get_user(val, (int __user *) optval))
return -EFAULT;
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 3828a42..da4098f 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -194,7 +194,8 @@ nf_nat_out(unsigned int hooknum,

if ((ct->tuplehash[dir].tuple.src.u3.ip !=
ct->tuplehash[!dir].tuple.dst.u3.ip) ||
- (ct->tuplehash[dir].tuple.src.u.all !=
+ (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
+ ct->tuplehash[dir].tuple.src.u.all !=
ct->tuplehash[!dir].tuple.dst.u.all)
)
return ip_xfrm_me_harder(skb) == 0 ? ret : NF_DROP;
@@ -230,7 +231,8 @@ nf_nat_local_fn(unsigned int hooknum,
ret = NF_DROP;
}
#ifdef CONFIG_XFRM
- else if (ct->tuplehash[dir].tuple.dst.u.all !=
+ else if (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
+ ct->tuplehash[dir].tuple.dst.u.all !=
ct->tuplehash[!dir].tuple.src.u.all)
if (ip_xfrm_me_harder(skb))
ret = NF_DROP;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 63dd1f8..34c1109 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -828,6 +828,7 @@ pref_skip_coa:
if (val < 0 || val > 255)
goto e_inval;
np->min_hopcount = val;
+ retv = 0;
break;
case IPV6_DONTFRAG:
np->dontfrag = valbool;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index db8fae5..498e87b 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1297,6 +1297,8 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
struct net_device *dev);
netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
struct net_device *dev);
+void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
+ struct sk_buff_head *skbs);

/* HT */
bool ieee80111_cfg_override_disables_ht40(struct ieee80211_sub_if_data *sdata);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index d93d39b..6d25d77 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -738,8 +738,8 @@ int __must_check __sta_info_destroy(struct sta_info *sta)

for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
local->total_ps_buffered -= skb_queue_len(&sta->ps_tx_buf[ac]);
- __skb_queue_purge(&sta->ps_tx_buf[ac]);
- __skb_queue_purge(&sta->tx_filtered[ac]);
+ ieee80211_purge_tx_queue(&local->hw, &sta->ps_tx_buf[ac]);
+ ieee80211_purge_tx_queue(&local->hw, &sta->tx_filtered[ac]);
}

#ifdef CONFIG_MAC80211_MESH
@@ -774,7 +774,7 @@ int __must_check __sta_info_destroy(struct sta_info *sta)
tid_tx = rcu_dereference_raw(sta->ampdu_mlme.tid_tx[i]);
if (!tid_tx)
continue;
- __skb_queue_purge(&tid_tx->pending);
+ ieee80211_purge_tx_queue(&local->hw, &tid_tx->pending);
kfree(tid_tx);
}

@@ -959,6 +959,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
struct ieee80211_local *local = sdata->local;
struct sk_buff_head pending;
int filtered = 0, buffered = 0, ac;
+ unsigned long flags;

clear_sta_flag(sta, WLAN_STA_SP);

@@ -974,12 +975,16 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
int count = skb_queue_len(&pending), tmp;

+ spin_lock_irqsave(&sta->tx_filtered[ac].lock, flags);
skb_queue_splice_tail_init(&sta->tx_filtered[ac], &pending);
+ spin_unlock_irqrestore(&sta->tx_filtered[ac].lock, flags);
tmp = skb_queue_len(&pending);
filtered += tmp - count;
count = tmp;

+ spin_lock_irqsave(&sta->ps_tx_buf[ac].lock, flags);
skb_queue_splice_tail_init(&sta->ps_tx_buf[ac], &pending);
+ spin_unlock_irqrestore(&sta->ps_tx_buf[ac].lock, flags);
tmp = skb_queue_len(&pending);
buffered += tmp - count;
}
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 5f8f89e..47b117f 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -660,3 +660,12 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb)
dev_kfree_skb_any(skb);
}
EXPORT_SYMBOL(ieee80211_free_txskb);
+
+void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
+ struct sk_buff_head *skbs)
+{
+ struct sk_buff *skb;
+
+ while ((skb = __skb_dequeue(skbs)))
+ ieee80211_free_txskb(hw, skb);
+}
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index e76facc..eace766 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1357,7 +1357,7 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
if (tx->skb)
dev_kfree_skb(tx->skb);
else
- __skb_queue_purge(&tx->skbs);
+ ieee80211_purge_tx_queue(&tx->local->hw, &tx->skbs);
return -1;
} else if (unlikely(res == TX_QUEUED)) {
I802_DEBUG_INC(tx->local->tx_handlers_queued);
@@ -2126,10 +2126,13 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
*/
void ieee80211_clear_tx_pending(struct ieee80211_local *local)
{
+ struct sk_buff *skb;
int i;

- for (i = 0; i < local->hw.queues; i++)
- skb_queue_purge(&local->pending[i]);
+ for (i = 0; i < local->hw.queues; i++) {
+ while ((skb = skb_dequeue(&local->pending[i])) != NULL)
+ ieee80211_free_txskb(&local->hw, skb);
+ }
}

/*
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 266d092..73ef163 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1341,6 +1341,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
list_for_each_entry(sdata, &local->interfaces, list) {
if (sdata->vif.type != NL80211_IFTYPE_STATION)
continue;
+ if (!sdata->u.mgd.associated)
+ continue;

ieee80211_send_nullfunc(local, sdata, 0);
}
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 0d07a1d..e022123 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -158,21 +158,18 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
* sCL -> sSS
*/
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
-/*synack*/ { sIV, sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
+/*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
/*
* sNO -> sIV Too late and no reason to do anything
* sSS -> sIV Client can't send SYN and then SYN/ACK
* sS2 -> sSR SYN/ACK sent to SYN2 in simultaneous open
- * sSR -> sIG
- * sES -> sIG Error: SYNs in window outside the SYN_SENT state
- * are errors. Receiver will reply with RST
- * and close the connection.
- * Or we are not in sync and hold a dead connection.
- * sFW -> sIG
- * sCW -> sIG
- * sLA -> sIG
- * sTW -> sIG
- * sCL -> sIG
+ * sSR -> sSR Late retransmitted SYN/ACK in simultaneous open
+ * sES -> sIV Invalid SYN/ACK packets sent by the client
+ * sFW -> sIV
+ * sCW -> sIV
+ * sLA -> sIV
+ * sTW -> sIV
+ * sCL -> sIV
*/
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
@@ -627,15 +624,9 @@ static bool tcp_in_window(const struct nf_conn *ct,
ack = sack = receiver->td_end;
}

- if (seq == end
- && (!tcph->rst
- || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
+ if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
/*
- * Packets contains no data: we assume it is valid
- * and check the ack value only.
- * However RST segments are always validated by their
- * SEQ number, except when seq == 0 (reset sent answering
- * SYN.
+ * RST sent answering SYN.
*/
seq = end = sender->td_end;

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index b01449f..4dc8347 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -134,9 +134,8 @@ static const struct ieee80211_regdomain world_regdom = {
.reg_rules = {
/* IEEE 802.11b/g, channels 1..11 */
REG_RULE(2412-10, 2462+10, 40, 6, 20, 0),
- /* IEEE 802.11b/g, channels 12..13. No HT40
- * channel fits here. */
- REG_RULE(2467-10, 2472+10, 20, 6, 20,
+ /* IEEE 802.11b/g, channels 12..13. */
+ REG_RULE(2467-10, 2472+10, 40, 6, 20,
NL80211_RRF_PASSIVE_SCAN |
NL80211_RRF_NO_IBSS),
/* IEEE 802.11 channel 14 - Only JP enables
diff --git a/security/selinux/netnode.c b/security/selinux/netnode.c
index 8636585..04aa5c8 100644
--- a/security/selinux/netnode.c
+++ b/security/selinux/netnode.c
@@ -174,7 +174,8 @@ static void sel_netnode_insert(struct sel_netnode *node)
if (sel_netnode_hash[idx].size == SEL_NETNODE_HASH_BKT_LIMIT) {
struct sel_netnode *tail;
tail = list_entry(
- rcu_dereference(sel_netnode_hash[idx].list.prev),
+ rcu_dereference_protected(sel_netnode_hash[idx].list.prev,
+ lockdep_is_held(&sel_netnode_lock)),
struct sel_netnode, list);
list_del_rcu(&tail->list);
kfree_rcu(tail, rcu);
diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c
index 7143393..e23ad3f 100644
--- a/sound/pci/hda/patch_analog.c
+++ b/sound/pci/hda/patch_analog.c
@@ -544,6 +544,7 @@ static int ad198x_build_pcms(struct hda_codec *codec)
if (spec->multiout.dig_out_nid) {
info++;
codec->num_pcms++;
+ codec->spdif_status_reset = 1;
info->name = "AD198x Digital";
info->pcm_type = HDA_PCM_TYPE_SPDIF;
info->stream[SNDRV_PCM_STREAM_PLAYBACK] = ad198x_pcm_digital_playback;
diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c
index 2bc6c51..057f95a2 100644
--- a/sound/pci/hda/patch_cirrus.c
+++ b/sound/pci/hda/patch_cirrus.c
@@ -95,8 +95,8 @@ enum {
#define CS420X_VENDOR_NID 0x11
#define CS_DIG_OUT1_PIN_NID 0x10
#define CS_DIG_OUT2_PIN_NID 0x15
-#define CS_DMIC1_PIN_NID 0x12
-#define CS_DMIC2_PIN_NID 0x0e
+#define CS_DMIC1_PIN_NID 0x0e
+#define CS_DMIC2_PIN_NID 0x12

/* coef indices */
#define IDX_SPDIF_STAT 0x0000
@@ -1084,14 +1084,18 @@ static void init_input(struct hda_codec *codec)
cs_automic(codec);

coef = 0x000a; /* ADC1/2 - Digital and Analog Soft Ramp */
+ cs_vendor_coef_set(codec, IDX_ADC_CFG, coef);
+
+ coef = cs_vendor_coef_get(codec, IDX_BEEP_CFG);
if (is_active_pin(codec, CS_DMIC2_PIN_NID))
- coef |= 0x0500; /* DMIC2 2 chan on, GPIO1 off */
+ coef |= 1 << 4; /* DMIC2 2 chan on, GPIO1 off */
if (is_active_pin(codec, CS_DMIC1_PIN_NID))
- coef |= 0x1800; /* DMIC1 2 chan on, GPIO0 off
+ coef |= 1 << 3; /* DMIC1 2 chan on, GPIO0 off
* No effect if SPDIF_OUT2 is
* selected in IDX_SPDIF_CTL.
*/
- cs_vendor_coef_set(codec, IDX_ADC_CFG, coef);
+
+ cs_vendor_coef_set(codec, IDX_BEEP_CFG, coef);
} else {
if (spec->mic_detect)
cs_automic(codec);
@@ -1112,7 +1116,7 @@ static const struct hda_verb cs_coef_init_verbs[] = {
| 0x0400 /* Disable Coefficient Auto increment */
)},
/* Beep */
- {0x11, AC_VERB_SET_COEF_INDEX, IDX_DAC_CFG},
+ {0x11, AC_VERB_SET_COEF_INDEX, IDX_BEEP_CFG},
{0x11, AC_VERB_SET_PROC_COEF, 0x0007}, /* Enable Beep thru DAC1/2/3 */

{} /* terminator */
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 6ecf1d4..257fe87 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -5458,6 +5458,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
SND_PCI_QUIRK(0x106b, 0x4000, "MacbookPro 5,1", ALC889_FIXUP_IMAC91_VREF),
SND_PCI_QUIRK(0x106b, 0x4100, "Macmini 3,1", ALC889_FIXUP_IMAC91_VREF),
SND_PCI_QUIRK(0x106b, 0x4200, "Mac Pro 5,1", ALC885_FIXUP_MACPRO_GPIO),
+ SND_PCI_QUIRK(0x106b, 0x4300, "iMac 9,1", ALC889_FIXUP_IMAC91_VREF),
SND_PCI_QUIRK(0x106b, 0x4600, "MacbookPro 5,2", ALC889_FIXUP_IMAC91_VREF),
SND_PCI_QUIRK(0x106b, 0x4900, "iMac 9,1 Aluminum", ALC889_FIXUP_IMAC91_VREF),
SND_PCI_QUIRK(0x106b, 0x4a00, "Macbook 5,2", ALC889_FIXUP_IMAC91_VREF),
@@ -7047,6 +7048,7 @@ static const struct hda_codec_preset snd_hda_preset_realtek[] = {
.patch = patch_alc662 },
{ .id = 0x10ec0663, .name = "ALC663", .patch = patch_alc662 },
{ .id = 0x10ec0665, .name = "ALC665", .patch = patch_alc662 },
+ { .id = 0x10ec0668, .name = "ALC668", .patch = patch_alc662 },
{ .id = 0x10ec0670, .name = "ALC670", .patch = patch_alc662 },
{ .id = 0x10ec0680, .name = "ALC680", .patch = patch_alc680 },
{ .id = 0x10ec0880, .name = "ALC880", .patch = patch_alc880 },
@@ -7064,6 +7066,7 @@ static const struct hda_codec_preset snd_hda_preset_realtek[] = {
{ .id = 0x10ec0889, .name = "ALC889", .patch = patch_alc882 },
{ .id = 0x10ec0892, .name = "ALC892", .patch = patch_alc662 },
{ .id = 0x10ec0899, .name = "ALC898", .patch = patch_alc882 },
+ { .id = 0x10ec0900, .name = "ALC1150", .patch = patch_alc882 },
{} /* terminator */
};

diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
index 3998d09b..9dafacd 100644
--- a/sound/pci/hda/patch_via.c
+++ b/sound/pci/hda/patch_via.c
@@ -1868,11 +1868,11 @@ static int via_auto_fill_dac_nids(struct hda_codec *codec)
{
struct via_spec *spec = codec->spec;
const struct auto_pin_cfg *cfg = &spec->autocfg;
- int i, dac_num;
+ int i;
hda_nid_t nid;

+ spec->multiout.num_dacs = 0;
spec->multiout.dac_nids = spec->private_dac_nids;
- dac_num = 0;
for (i = 0; i < cfg->line_outs; i++) {
hda_nid_t dac = 0;
nid = cfg->line_out_pins[i];
@@ -1883,16 +1883,13 @@ static int via_auto_fill_dac_nids(struct hda_codec *codec)
if (!i && parse_output_path(codec, nid, dac, 1,
&spec->out_mix_path))
dac = spec->out_mix_path.path[0];
- if (dac) {
- spec->private_dac_nids[i] = dac;
- dac_num++;
- }
+ if (dac)
+ spec->private_dac_nids[spec->multiout.num_dacs++] = dac;
}
if (!spec->out_path[0].depth && spec->out_mix_path.depth) {
spec->out_path[0] = spec->out_mix_path;
spec->out_mix_path.depth = 0;
}
- spec->multiout.num_dacs = dac_num;
return 0;
}

@@ -3668,6 +3665,18 @@ static void set_widgets_power_state_vt2002P(struct hda_codec *codec)
update_power_state(codec, 0x21, AC_PWRST_D3);
}

+/* NIDs 0x24 and 0x33 on VT1802 have connections to non-existing NID 0x3e
+ * Replace this with mixer NID 0x1c
+ */
+static void fix_vt1802_connections(struct hda_codec *codec)
+{
+ static hda_nid_t conn_24[] = { 0x14, 0x1c };
+ static hda_nid_t conn_33[] = { 0x1c };
+
+ snd_hda_override_conn_list(codec, 0x24, ARRAY_SIZE(conn_24), conn_24);
+ snd_hda_override_conn_list(codec, 0x33, ARRAY_SIZE(conn_33), conn_33);
+}
+
/* patch for vt2002P */
static int patch_vt2002P(struct hda_codec *codec)
{
@@ -3682,6 +3691,8 @@ static int patch_vt2002P(struct hda_codec *codec)
spec->aa_mix_nid = 0x21;
override_mic_boost(codec, 0x2b, 0, 3, 40);
override_mic_boost(codec, 0x29, 0, 3, 40);
+ if (spec->codec_type == VT1802)
+ fix_vt1802_connections(codec);
add_secret_dac_path(codec);

/* automatic parse from the BIOS config */
diff --git a/sound/soc/codecs/wm8978.c b/sound/soc/codecs/wm8978.c
index 72d5fdc..6c37c7c 100644
--- a/sound/soc/codecs/wm8978.c
+++ b/sound/soc/codecs/wm8978.c
@@ -783,7 +783,7 @@ static int wm8978_hw_params(struct snd_pcm_substream *substream,
wm8978->mclk_idx = -1;
f_sel = wm8978->f_mclk;
} else {
- if (!wm8978->f_pllout) {
+ if (!wm8978->f_opclk) {
/* We only enter here, if OPCLK is not used */
int ret = wm8978_configure_pll(codec);
if (ret < 0)
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index c41efe0..9ae82a4 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -3253,7 +3253,7 @@ void snd_soc_dapm_shutdown(struct snd_soc_card *card)
{
struct snd_soc_codec *codec;

- list_for_each_entry(codec, &card->codec_dev_list, list) {
+ list_for_each_entry(codec, &card->codec_dev_list, card_list) {
soc_dapm_shutdown_codec(&codec->dapm);
if (codec->dapm.bias_level == SND_SOC_BIAS_STANDBY)
snd_soc_dapm_set_bias_level(&codec->dapm,
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/