Re: per device queues for cciss 2.6.0

From: Jeff Garzik
Date: Tue Mar 09 2004 - 18:47:10 EST


Miller, Mike (OS Dev) wrote:
The command buffer as it is now is per hba. We realize that there may be issues with volumes being starved out but the change was done to make the current driver work with multiple logical volumes. When we move to per logical volume locking scheme we can also implement a per logical volume command structure.

The starvation problem is pretty easy to solve... see attached.

I was mainly asking about the hardware itself... does the _hardware_ have a single command buffer, or a per-device command buffer?

Jeff



#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/blkdev.h>
#include <linux/sched.h>
#include <linux/devfs_fs_kernel.h>
#include <linux/interrupt.h>
#include <linux/compiler.h>
#include <linux/workqueue.h>
#include <linux/bitops.h>
#include <linux/delay.h>
#include <linux/time.h>
#include <linux/hdreg.h>
#include <asm/io.h>
#include <asm/semaphore.h>
#include <asm/uaccess.h>

MODULE_AUTHOR("Jeff Garzik");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Promise SX8 (carmel) block driver");

#if 0
#define CARM_DEBUG
#define CARM_VERBOSE_DEBUG
#else
#undef CARM_DEBUG
#undef CARM_VERBOSE_DEBUG
#endif
#undef CARM_NDEBUG

#define DRV_NAME "carmel"
#define DRV_VERSION "0.5"
#define PFX DRV_NAME ": "

#define NEXT_RESP(idx) ((idx + 1) % RMSG_Q_LEN)

/* 0xf is just arbitrary, non-zero noise; this is sorta like poisoning */
#define TAG_ENCODE(tag) (((tag) << 16) | 0xf)
#define TAG_DECODE(tag) (((tag) >> 16) & 0x1f)
#define TAG_VALID(tag) ((((tag) & 0xf) == 0xf) && (TAG_DECODE(tag) < 32))

/* note: prints function name for you */
#ifdef CARM_DEBUG
#define DPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __FUNCTION__, ## args)
#ifdef CARM_VERBOSE_DEBUG
#define VPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __FUNCTION__, ## args)
#else
#define VPRINTK(fmt, args...)
#endif /* CARM_VERBOSE_DEBUG */
#else
#define DPRINTK(fmt, args...)
#define VPRINTK(fmt, args...)
#endif /* CARM_DEBUG */

#ifdef CARM_NDEBUG
#define assert(expr)
#else
#define assert(expr) \
if(unlikely(!(expr))) { \
printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
#expr,__FILE__,__FUNCTION__,__LINE__); \
}
#endif

/* defines only for the constants which don't work well as enums */
struct carm_host;

enum {
CARM_MAX_PORTS = 8,
CARM_SHM_SIZE = (4096 << 7),
CARM_MINORS_PER_MAJOR = 256 / CARM_MAX_PORTS,
CARM_MAX_REQ = 32,
CARM_MAX_SG = 32,
CARM_MAX_Q = 3, /* three TCQ tags per port */
CARM_MAX_WAIT_Q = CARM_MAX_PORTS + 1,

CARM_IHQP = 0x1c,
CARM_INT_STAT = 0x10,
CARM_INT_MASK = 0x14,
CARM_HMUC = 0x18,
RBUF_ADDR_LO = 0x20,
RBUF_ADDR_HI = 0x24,
RBUF_BYTE_SZ = 0x28,
CARM_RESP_IDX = 0x2c,
CARM_CMS0 = 0x30,
CARM_LMUC = 0x48,
CARM_HMPHA = 0x6c,
CARM_INITC = 0xb5,

INT_RESERVED = 0xfffffff0,
INT_WATCHDOG = (1 << 3),
INT_Q_OVERFLOW = (1 << 2),
INT_Q_AVAILABLE = (1 << 1),
INT_RESPONSE = (1 << 0),
INT_ACK_MASK = INT_WATCHDOG | INT_Q_OVERFLOW,
INT_DEF_MASK = INT_RESERVED | INT_Q_OVERFLOW |
INT_RESPONSE,

CARM_HAVE_RESP = 0x01,
CARM_MSG_READ = 1,
CARM_MSG_WRITE = 2,
CARM_MSG_IOCTL = 4,
CARM_MSG_ARRAY = 8,
CARM_MSG_MISC = 9,
CARM_CME = (1 << 2),
CARM_RME = (1 << 1),
CARM_WZBC = (1 << 0),
CARM_RMI = (1 << 0),
CARM_Q_FULL = (1 << 3),
CARM_MSG_SIZE = 288,
CARM_Q_LEN = 48,

/* CARM_MSG_IOCTL messages */
CARM_IOC_SCAN_CHAN = 5,

IOC_SCAN_CHAN_NODEV = 0x1f,

/* CARM_MSG_ARRAY messages */
CARM_ARRAY_INFO = 0,

ARRAY_NO_EXIST = (1 << 31),

RMSG_SZ = 8,
RMSG_Q_LEN = 48,
RMSG_OK = 1,
RBUF_LEN = RMSG_SZ * RMSG_Q_LEN,

PDC_SHM_SIZE = (4096 << 7),

MISC_GET_FW_VER = 2,
MISC_ALLOC_MEM = 3,
MISC_SYNC_TIME = 5,

FW_VER_4PORT = (1 << 2), /* 1=4 ports, 0=8 ports */
FW_VER_NON_RAID = (1 << 1), /* 1=non-RAID firmware, 0=RAID */
FW_VER_ZCR = (1 << 0), /* zero channel RAID (whatever that is) */

/* host flags */
FL_DAC = (1 << 0),
FL_NON_RAID = FW_VER_NON_RAID,
FL_4PORT = FW_VER_4PORT,
FL_FW_VER_MASK = (FW_VER_NON_RAID | FW_VER_4PORT),
};

enum host_states {
HST_INVALID, /* invalid state; never used */
HST_ALLOC_BUF, /* setting up master SHM area */
HST_ERROR, /* we never leave here */
HST_PORT_SCAN, /* start dev scan */
HST_DEV_SCAN_START, /* start per-device probe */
HST_DEV_SCAN, /* continue per-device probe */
HST_DEV_ACTIVATE, /* activate devices we found */
HST_PROBE_FINISHED, /* probe is complete */
HST_PROBE_START, /* initiate probe */
HST_SYNC_TIME, /* tell firmware what time it is */
HST_GET_FW_VER, /* get firmware version, adapter port cnt */
};

#ifdef CARM_DEBUG
static const char *state_name[] = {
"HST_INVALID",
"HST_ALLOC_BUF",
"HST_ERROR",
"HST_PORT_SCAN",
"HST_DEV_SCAN_START",
"HST_DEV_SCAN",
"HST_DEV_ACTIVATE",
"HST_PROBE_FINISHED",
"HST_PROBE_START",
"HST_SYNC_TIME",
"HST_GET_FW_VER",
};
#endif

struct carm_port {
unsigned int port_no;
unsigned int n_queued;
struct gendisk *disk;
struct carm_host *host;

/* attached device characteristics */
u64 capacity;
char name[41];
u16 dev_geom_head;
u16 dev_geom_sect;
u16 dev_geom_cyl;
};

struct carm_request {
unsigned int tag;
int n_elem;
unsigned int msg_type;
unsigned int msg_subtype;
struct request *rq;
struct carm_port *port;
struct scatterlist sg[CARM_MAX_SG];
};

struct carm_host {
unsigned long flags;
void *mmio;
void *shm;
dma_addr_t shm_dma;
int major;
spinlock_t lock;
struct pci_dev *pdev;
unsigned int state;
u32 fw_ver;

request_queue_t *oob_q;
unsigned int n_oob;

unsigned int resp_idx;

unsigned int wait_q_prod;
unsigned int wait_q_cons;
request_queue_t *wait_q[CARM_MAX_WAIT_Q];

unsigned long msg_alloc;
struct carm_request req[CARM_MAX_REQ];
void *msg_base;
dma_addr_t msg_dma;

int cur_scan_dev;
unsigned long dev_active;
unsigned long dev_present;
struct carm_port port[CARM_MAX_PORTS];

struct work_struct fsm_task;

struct semaphore probe_sem;
};

struct carm_response {
u32 ret_handle;
u32 status;
} __attribute__((packed));

struct carm_msg_sg {
u32 start;
u32 len;
} __attribute__((packed));

struct carm_msg_rw {
u8 type;
u8 id;
u8 sg_count;
u8 sg_type;
u32 handle;
u32 lba;
u16 lba_count;
u16 lba_high;
struct carm_msg_sg sg[32];
} __attribute__((packed));

struct carm_msg_allocbuf {
u8 type;
u8 subtype;
u8 n_sg;
u8 sg_type;
u32 handle;
u32 addr;
u32 len;
u32 evt_pool;
u32 n_evt;
u32 rbuf_pool;
u32 n_rbuf;
u32 msg_pool;
u32 n_msg;
struct carm_msg_sg sg[8];
} __attribute__((packed));

struct carm_msg_ioctl {
u8 type;
u8 subtype;
u8 array_id;
u8 reserved1;
u32 handle;
u32 data_addr;
u32 reserved2;
} __attribute__((packed));

struct carm_msg_sync_time {
u8 type;
u8 subtype;
u16 reserved1;
u32 handle;
u32 reserved2;
u32 timestamp;
} __attribute__((packed));

struct carm_msg_get_fw_ver {
u8 type;
u8 subtype;
u16 reserved1;
u32 handle;
u32 data_addr;
u32 reserved2;
} __attribute__((packed));

struct carm_fw_ver {
u32 version;
u8 features;
u8 reserved1;
u16 reserved2;
} __attribute__((packed));

struct carm_array_info {
u32 size;

u16 size_hi;
u16 stripe_size;

u32 mode;

u16 stripe_blk_sz;
u16 reserved1;

u16 cyl;
u16 head;

u16 sect;
u8 array_id;
u8 reserved2;

char name[40];

u32 array_status;

/* device list continues beyond this point? */
} __attribute__((packed));

static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent);
static void carm_remove_one (struct pci_dev *pdev);
static int carm_bdev_ioctl(struct inode *ino, struct file *fil,
unsigned int cmd, unsigned long arg);

static struct pci_device_id carm_pci_tbl[] = {
{ PCI_VENDOR_ID_PROMISE, 0x8000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, },
{ PCI_VENDOR_ID_PROMISE, 0x8002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, },
{ } /* terminate list */
};
MODULE_DEVICE_TABLE(pci, carm_pci_tbl);

static struct pci_driver carm_driver = {
.name = DRV_NAME,
.id_table = carm_pci_tbl,
.probe = carm_init_one,
.remove = carm_remove_one,
};

static struct block_device_operations carm_bd_ops = {
.owner = THIS_MODULE,
.ioctl = carm_bdev_ioctl,
};

static unsigned int carm_host_id;



static int carm_bdev_ioctl(struct inode *ino, struct file *fil,
unsigned int cmd, unsigned long arg)
{
void __user *usermem = (void *) arg;
struct carm_port *port = ino->i_bdev->bd_disk->private_data;
struct hd_geometry geom;

switch (cmd) {
case HDIO_GETGEO:
if (!usermem)
return -EINVAL;

geom.heads = (u8) port->dev_geom_head;
geom.sectors = (u8) port->dev_geom_sect;
geom.cylinders = port->dev_geom_cyl;
geom.start = get_start_sect(ino->i_bdev);

if (copy_to_user(usermem, &geom, sizeof(geom)))
return -EFAULT;
return 0;

default:
break;
}

return -EOPNOTSUPP;
}

static inline unsigned long msecs_to_jiffies(unsigned long msecs)
{
return ((HZ * msecs + 999) / 1000);
}

static void msleep(unsigned long msecs)
{
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(msecs_to_jiffies(msecs));
}

static inline void *carm_ref_msg(struct carm_host *host,
unsigned int msg_idx)
{
return host->msg_base + (msg_idx * CARM_MSG_SIZE);
}

static inline dma_addr_t carm_ref_msg_dma(struct carm_host *host,
unsigned int msg_idx)
{
return host->msg_dma + (msg_idx * CARM_MSG_SIZE);
}

static int carm_send_msg(struct carm_host *host, dma_addr_t msg, u32 cms_type)
{
void *mmio = host->mmio;
u32 tmp;
int rc = 0;

VPRINTK("ENTER\n");

tmp = readl(mmio + CARM_HMUC);
if (tmp & CARM_Q_FULL) {
#if 0
tmp = readl(mmio + CARM_INT_MASK);
tmp |= INT_Q_AVAILABLE;
writel(tmp, mmio + CARM_INT_MASK);
readl(mmio + CARM_INT_MASK); /* flush */
#endif
DPRINTK("host msg queue full\n");
rc = -EBUSY;
} else {
writel(msg | cms_type, mmio + CARM_IHQP);
readl(mmio + CARM_IHQP); /* flush */
}

return rc;
}

static struct carm_request *carm_get_request(struct carm_host *host)
{
unsigned int i;

for (i = 0; i < (ARRAY_SIZE(host->req) - 1); i++)
if (!test_and_set_bit(i, &host->msg_alloc)) {
struct carm_request *crq = &host->req[i];
crq->port = NULL;
crq->n_elem = 0;
return crq;
}

DPRINTK("no request available, returning NULL\n");
return NULL;
}

static int carm_put_request(struct carm_host *host, struct carm_request *crq)
{
if (!test_and_clear_bit(crq->tag, &host->msg_alloc))
return -EINVAL; /* tried to clear a tag that was not active */

/* do something, someday */
return 0;
}

static struct carm_request *carm_get_special(struct carm_host *host)
{
unsigned long flags;
struct carm_request *crq = NULL;
struct request *rq;
int tries = 5000;

while (tries-- > 0) {
spin_lock_irqsave(&host->lock, flags);
crq = carm_get_request(host);
spin_unlock_irqrestore(&host->lock, flags);

if (crq)
break;
msleep(10);
}

if (!crq)
return NULL;

rq = blk_get_request(host->oob_q, WRITE /* bogus */, GFP_KERNEL);
if (!rq) {
spin_lock_irqsave(&host->lock, flags);
carm_put_request(host, crq);
spin_unlock_irqrestore(&host->lock, flags);
return NULL;
}

crq->rq = rq;
return crq;
}

static int carm_array_info (struct carm_host *host, unsigned int array_idx)
{
struct carm_msg_ioctl *ioc;
unsigned int idx;
u32 msg_data;
dma_addr_t msg_dma;
struct carm_request *crq;
int rc;

crq = carm_get_special(host);
if (!crq) {
rc = -ENOMEM;
goto err_out;
}

idx = crq->tag;

ioc = carm_ref_msg(host, idx);
msg_dma = carm_ref_msg_dma(host, idx);
msg_data = (u32) (msg_dma + sizeof(struct carm_array_info));

crq->msg_type = CARM_MSG_ARRAY;
crq->msg_subtype = CARM_ARRAY_INFO;

memset(ioc, 0, sizeof(*ioc));
ioc->type = CARM_MSG_ARRAY;
ioc->subtype = CARM_ARRAY_INFO;
ioc->array_id = (u8) array_idx;
ioc->handle = cpu_to_le32(TAG_ENCODE(idx));
ioc->data_addr = cpu_to_le32(msg_data);

spin_lock_irq(&host->lock);
assert(host->state == HST_DEV_SCAN_START ||
host->state == HST_DEV_SCAN);
spin_unlock_irq(&host->lock);

DPRINTK("blk_insert_request, tag == %u\n", idx);
blk_insert_request(host->oob_q, crq->rq, 1, crq, 0);

return 0;

err_out:
spin_lock_irq(&host->lock);
host->state = HST_ERROR;
spin_unlock_irq(&host->lock);
return rc;
}

typedef void (*carm_sspc_t)(struct carm_host *, unsigned int, void *);

static int carm_send_special (struct carm_host *host, carm_sspc_t func)
{
struct carm_request *crq;
struct carm_msg_ioctl *ioc;
void *mem;
unsigned int idx;

crq = carm_get_special(host);
if (!crq)
return -ENOMEM;

idx = crq->tag;

mem = carm_ref_msg(host, idx);

func(host, idx, mem);

ioc = mem;
crq->msg_type = ioc->type;
crq->msg_subtype = ioc->subtype;

DPRINTK("blk_insert_request, tag == %u\n", idx);
blk_insert_request(host->oob_q, crq->rq, 1, crq, 0);

return 0;
}

static void carm_fill_sync_time(struct carm_host *host, unsigned int idx,
void *mem)
{
struct timeval tv;
struct carm_msg_sync_time *st = mem;

do_gettimeofday(&tv);

memset(st, 0, sizeof(*st));
st->type = CARM_MSG_MISC;
st->subtype = MISC_SYNC_TIME;
st->handle = cpu_to_le32(TAG_ENCODE(idx));
st->timestamp = cpu_to_le32(tv.tv_sec);
}

static void carm_fill_alloc_buf(struct carm_host *host, unsigned int idx,
void *mem)
{
struct carm_msg_allocbuf *ab = mem;

memset(ab, 0, sizeof(*ab));
ab->type = CARM_MSG_MISC;
ab->subtype = MISC_ALLOC_MEM;
ab->handle = cpu_to_le32(TAG_ENCODE(idx));
ab->n_sg = 1;
ab->sg_type = 0; /* FIXME: correct? */
ab->addr = cpu_to_le32(host->shm_dma + (PDC_SHM_SIZE >> 1));
ab->len = cpu_to_le32(PDC_SHM_SIZE >> 1);
ab->evt_pool = cpu_to_le32(host->shm_dma + (16 * 1024));
ab->n_evt = cpu_to_le32(1024);
ab->rbuf_pool = cpu_to_le32(host->shm_dma);
ab->n_rbuf = cpu_to_le32(RMSG_Q_LEN);
ab->msg_pool = cpu_to_le32(host->shm_dma + RBUF_LEN);
ab->n_msg = cpu_to_le32(CARM_Q_LEN);
ab->sg[0].start = cpu_to_le32(host->shm_dma + (PDC_SHM_SIZE >> 1));
ab->sg[0].len = cpu_to_le32(65536);
}

static void carm_fill_scan_channels(struct carm_host *host, unsigned int idx,
void *mem)
{
struct carm_msg_ioctl *ioc = mem;
u32 msg_data = (u32) (carm_ref_msg_dma(host, idx) +
sizeof(struct carm_msg_ioctl));

memset(ioc, 0, sizeof(*ioc));
ioc->type = CARM_MSG_IOCTL;
ioc->subtype = CARM_IOC_SCAN_CHAN;
ioc->handle = cpu_to_le32(TAG_ENCODE(idx));
ioc->data_addr = cpu_to_le32(msg_data);

/* fill output data area with "no device" default values */
mem += sizeof(struct carm_msg_ioctl);
memset(mem, IOC_SCAN_CHAN_NODEV, CARM_MAX_PORTS);
}

static void carm_fill_get_fw_ver(struct carm_host *host, unsigned int idx,
void *mem)
{
struct carm_msg_get_fw_ver *ioc = mem;
u32 msg_data = (u32) (carm_ref_msg_dma(host, idx) + sizeof(*ioc));

memset(ioc, 0, sizeof(*ioc));
ioc->type = CARM_MSG_MISC;
ioc->subtype = MISC_GET_FW_VER;
ioc->handle = cpu_to_le32(TAG_ENCODE(idx));
ioc->data_addr = cpu_to_le32(msg_data);
}

static inline void carm_end_request_queued(struct carm_host *host,
struct carm_request *crq,
int uptodate)
{
struct request *req = crq->rq;
int rc;

rc = end_that_request_first(req, uptodate, req->hard_nr_sectors);
assert(rc == 0);

end_that_request_last(req);

rc = carm_put_request(host, crq);
assert(rc == 0);
}

static inline void carm_push_q (struct carm_host *host, request_queue_t *q)
{
unsigned int idx = host->wait_q_prod % CARM_MAX_WAIT_Q;

blk_stop_queue(q);
VPRINTK("STOPPED QUEUE %p\n", q);

host->wait_q[idx] = q;
host->wait_q_prod++;
BUG_ON(host->wait_q_prod == host->wait_q_cons); /* overrun */
}

static inline request_queue_t *carm_pop_q(struct carm_host *host)
{
unsigned int idx;

if (host->wait_q_prod == host->wait_q_cons)
return NULL;

idx = host->wait_q_cons % CARM_MAX_WAIT_Q;
host->wait_q_cons++;

return host->wait_q[idx];
}

static inline void carm_round_robin(struct carm_host *host)
{
request_queue_t *q = carm_pop_q(host);
if (q) {
blk_start_queue(q);
VPRINTK("STARTED QUEUE %p\n", q);
}
}

static inline void carm_end_rq(struct carm_host *host, struct carm_request *crq,
int is_ok)
{
carm_end_request_queued(host, crq, is_ok);
carm_round_robin(host);
}

static void carm_oob_rq_fn(request_queue_t *q)
{
struct carm_host *host = q->queuedata;
struct carm_request *crq;
struct request *rq;
int rc;

while (1) {
DPRINTK("get req\n");
rq = elv_next_request(q);
if (!rq)
break;

blkdev_dequeue_request(rq);

crq = rq->special;
assert(crq != NULL);
assert(crq->rq == rq);

crq->n_elem = 0;

/* TODO: is cms_type always 4? */
DPRINTK("send req\n");
rc = carm_send_msg(host, carm_ref_msg_dma(host, crq->tag), 4);
if (rc) {
blk_requeue_request(q, rq);
carm_push_q(host, q);
return; /* call us again later, eventually */
}
}
}

static void carm_rq_fn(request_queue_t *q)
{
struct carm_port *port = q->queuedata;
struct carm_host *host = port->host;
struct carm_msg_rw *msg;
struct carm_request *crq;
struct request *rq;
struct scatterlist *sg;
int writing = 0, pci_dir, i, n_elem, rc;
u32 cms_type, tmp;

queue_one_request:
VPRINTK("get req\n");
rq = elv_next_request(q);
if (!rq)
return;

crq = carm_get_request(host);
if (!crq) {
carm_push_q(host, q);
return; /* call us again later, eventually */
}
crq->rq = rq;

blkdev_dequeue_request(rq);

if (rq_data_dir(rq) == WRITE) {
writing = 1;
pci_dir = PCI_DMA_TODEVICE;
} else {
pci_dir = PCI_DMA_FROMDEVICE;
}

/* get scatterlist from block layer */
sg = &crq->sg[0];
n_elem = blk_rq_map_sg(q, rq, sg);
if (n_elem <= 0) {
carm_end_rq(host, crq, 0);
return; /* request with no s/g entries? */
}

/* map scatterlist to PCI bus addresses */
n_elem = pci_map_sg(host->pdev, sg, n_elem, pci_dir);
if (n_elem <= 0) {
carm_end_rq(host, crq, 0);
return; /* request with no s/g entries? */
}
crq->n_elem = n_elem;
crq->port = port;

/*
* build read/write message
*/

VPRINTK("build msg\n");
msg = (struct carm_msg_rw *) carm_ref_msg(host, crq->tag);

if (writing) {
msg->type = CARM_MSG_WRITE;
crq->msg_type = CARM_MSG_WRITE;
} else {
msg->type = CARM_MSG_READ;
crq->msg_type = CARM_MSG_READ;
}

msg->id = port->port_no;
msg->sg_count = n_elem;
msg->sg_type = 0;
msg->handle = cpu_to_le32(TAG_ENCODE(crq->tag));
msg->lba = cpu_to_le32(rq->sector & 0xffffffff);
tmp = (rq->sector >> 16) >> 16;
msg->lba_high = cpu_to_le16( (u16) tmp );
msg->lba_count = cpu_to_le16(rq->nr_sectors);

for (i = 0; i < n_elem; i++) {
struct carm_msg_sg *carm_sg = &msg->sg[i];
carm_sg->start = cpu_to_le32(sg_dma_address(&crq->sg[i]));
carm_sg->len = cpu_to_le32(sg_dma_len(&crq->sg[i]));
}

/* some magic :( document and/or figure out how to do better */
if (n_elem < 3) cms_type = 0;
else if (n_elem < 7) cms_type = 2;
else if (n_elem < 15) cms_type = 4;
else cms_type = 6;

/*
* queue read/write message to hardware
*/

VPRINTK("send msg, tag == %u\n", crq->tag);
rc = carm_send_msg(host, carm_ref_msg_dma(host, crq->tag), cms_type);
if (rc) {
blk_requeue_request(q, rq);
carm_push_q(host, q);
return; /* call us again later, eventually */
}

goto queue_one_request;
}

static void carm_handle_array_info(struct carm_host *host,
struct carm_request *crq, u8 *mem,
int is_ok)
{
struct carm_port *port;
u8 *msg_data = mem + sizeof(struct carm_array_info);
struct carm_array_info *desc = (struct carm_array_info *) msg_data;
u64 lo, hi;
int cur_port;
size_t slen;

DPRINTK("ENTER\n");

carm_end_rq(host, crq, is_ok);

if (!is_ok)
goto out;
if (le32_to_cpu(desc->array_status) & ARRAY_NO_EXIST)
goto out;

cur_port = host->cur_scan_dev;

/* should never occur */
if ((cur_port < 0) || (cur_port >= CARM_MAX_PORTS)) {
printk(KERN_ERR PFX "BUG: cur_scan_dev==%d, array_id==%d\n",
cur_port, (int) desc->array_id);
goto out;
}

port = &host->port[cur_port];

lo = (u64) le32_to_cpu(desc->size);
hi = (u64) le32_to_cpu(desc->size_hi);

port->capacity = lo | (hi << 32);
port->dev_geom_head = le16_to_cpu(desc->head);
port->dev_geom_sect = le16_to_cpu(desc->sect);
port->dev_geom_cyl = le16_to_cpu(desc->cyl);

host->dev_active |= (1 << cur_port);

strncpy(port->name, desc->name, sizeof(port->name));
port->name[sizeof(port->name) - 1] = 0;
slen = strlen(port->name);
while (slen && (port->name[slen - 1] == ' ')) {
port->name[slen - 1] = 0;
slen--;
}

printk(KERN_INFO DRV_NAME "(%s): port %u device %Lu sectors\n",
pci_name(host->pdev), port->port_no, port->capacity);
printk(KERN_INFO DRV_NAME "(%s): port %u device \"%s\"\n",
pci_name(host->pdev), port->port_no, port->name);

out:
assert(host->state == HST_DEV_SCAN);
schedule_work(&host->fsm_task);
}

static void carm_handle_scan_chan(struct carm_host *host,
struct carm_request *crq, u8 *mem,
int is_ok)
{
u8 *msg_data = mem + sizeof(struct carm_msg_ioctl);
unsigned int i, dev_count = 0;
int new_state = HST_DEV_SCAN_START;

DPRINTK("ENTER\n");

carm_end_rq(host, crq, is_ok);

if (!is_ok) {
new_state = HST_ERROR;
goto out;
}

/* TODO: scan and support non-disk devices */
for (i = 0; i < 8; i++)
if (msg_data[i] == 0) { /* direct-access device (disk) */
host->dev_present |= (1 << i);
dev_count++;
}

printk(KERN_INFO DRV_NAME "(%s): found %u interesting devices\n",
pci_name(host->pdev), dev_count);

out:
assert(host->state == HST_PORT_SCAN);
host->state = new_state;
schedule_work(&host->fsm_task);
}

static void carm_handle_generic(struct carm_host *host,
struct carm_request *crq, int is_ok,
int cur_state, int next_state)
{
DPRINTK("ENTER\n");

carm_end_rq(host, crq, is_ok);

assert(host->state == cur_state);
if (is_ok)
host->state = next_state;
else
host->state = HST_ERROR;
schedule_work(&host->fsm_task);
}

static inline void carm_handle_rw(struct carm_host *host,
struct carm_request *crq, int is_ok)
{
int pci_dir;

VPRINTK("ENTER\n");

if (rq_data_dir(crq->rq) == WRITE)
pci_dir = PCI_DMA_TODEVICE;
else
pci_dir = PCI_DMA_FROMDEVICE;

pci_unmap_sg(host->pdev, &crq->sg[0], crq->n_elem, pci_dir);

carm_end_rq(host, crq, is_ok);
}

static inline void carm_handle_resp(struct carm_host *host,
u32 ret_handle_le, u32 status)
{
u32 handle = le32_to_cpu(ret_handle_le);
unsigned int msg_idx;
struct carm_request *crq;
int is_ok = (status == RMSG_OK);
u8 *mem;

VPRINTK("ENTER, handle == 0x%x\n", handle);

if (unlikely(!TAG_VALID(handle))) {
printk(KERN_ERR DRV_NAME "(%s): BUG: invalid tag 0x%x\n",
pci_name(host->pdev), handle);
return;
}

msg_idx = TAG_DECODE(handle);
VPRINTK("tag == %u\n", msg_idx);

crq = &host->req[msg_idx];

/* fast path */
if (likely(crq->msg_type == CARM_MSG_READ ||
crq->msg_type == CARM_MSG_WRITE)) {
carm_handle_rw(host, crq, is_ok);
return;
}

mem = carm_ref_msg(host, msg_idx);

switch (crq->msg_type) {
case CARM_MSG_IOCTL: {
switch (crq->msg_subtype) {
case CARM_IOC_SCAN_CHAN:
carm_handle_scan_chan(host, crq, mem, is_ok);
break;
default:
/* unknown / invalid response */
goto err_out;
}
break;
}

case CARM_MSG_MISC: {
switch (crq->msg_subtype) {
case MISC_ALLOC_MEM:
carm_handle_generic(host, crq, is_ok,
HST_ALLOC_BUF, HST_SYNC_TIME);
break;
case MISC_SYNC_TIME:
carm_handle_generic(host, crq, is_ok,
HST_SYNC_TIME, HST_GET_FW_VER);
break;
case MISC_GET_FW_VER: {
struct carm_fw_ver *ver = (struct carm_fw_ver *)
mem + sizeof(struct carm_msg_get_fw_ver);
if (is_ok) {
host->fw_ver = le32_to_cpu(ver->version);
host->flags |= (ver->features & FL_FW_VER_MASK);
}
carm_handle_generic(host, crq, is_ok,
HST_GET_FW_VER, HST_PORT_SCAN);
break;
}
default:
/* unknown / invalid response */
goto err_out;
}
break;
}

case CARM_MSG_ARRAY: {
switch (crq->msg_subtype) {
case CARM_ARRAY_INFO:
carm_handle_array_info(host, crq, mem, is_ok);
break;
default:
/* unknown / invalid response */
goto err_out;
}
break;
}

default:
/* unknown / invalid response */
goto err_out;
}

return;

err_out:
printk(KERN_WARNING DRV_NAME "(%s): BUG: unhandled message type %d/%d\n",
pci_name(host->pdev), crq->msg_type, crq->msg_subtype);
carm_end_rq(host, crq, 0);
}

static inline void carm_handle_responses(struct carm_host *host)
{
void *mmio = host->mmio;
struct carm_response *resp = (struct carm_response *) host->shm;
unsigned int work = 0;
unsigned int idx = host->resp_idx % RMSG_Q_LEN;

while (1) {
u32 status = le32_to_cpu(resp[idx].status);

if (status == 0xffffffff) {
VPRINTK("ending response on index %u\n", idx);
writel(idx << 3, mmio + CARM_RESP_IDX);
break;
}

/* response to a message we sent */
else if ((status & (1 << 31)) == 0) {
VPRINTK("handling msg response on index %u\n", idx);
carm_handle_resp(host, resp[idx].ret_handle, status);
resp[idx].status = 0xffffffff;
}

/* asynchronous events the hardware throws our way */
else if ((status & 0xff000000) == (1 << 31)) {
u8 *evt_type_ptr = (u8 *) &resp[idx];
u8 evt_type = *evt_type_ptr;
printk(KERN_WARNING DRV_NAME "(%s): unhandled event type %d\n",
pci_name(host->pdev), (int) evt_type);
resp[idx].status = 0xffffffff;
}

idx = NEXT_RESP(idx);
work++;
}

VPRINTK("EXIT, work==%u\n", work);
host->resp_idx += work;
}

static irqreturn_t carm_interrupt(int irq, void *__host, struct pt_regs *regs)
{
struct carm_host *host = __host;
void *mmio;
u32 mask;
int handled = 0;

if (!host) {
VPRINTK("no host\n");
return IRQ_NONE;
}

spin_lock(&host->lock);

mmio = host->mmio;

/* reading should also clear interrupts */
mask = readl(mmio + CARM_INT_STAT);

if (mask == 0 || mask == 0xffffffff) {
VPRINTK("no work, mask == 0x%x\n", mask);
goto out;
}

if (mask & INT_ACK_MASK)
writel(mask, mmio + CARM_INT_STAT);

if (unlikely(host->state == HST_INVALID)) {
VPRINTK("not initialized yet, mask = 0x%x\n", mask);
goto out;
}

if (mask & CARM_HAVE_RESP) {
handled = 1;
carm_handle_responses(host);
}

out:
spin_unlock(&host->lock);
VPRINTK("EXIT\n");
return IRQ_RETVAL(handled);
}

static void carm_fsm_task (void *_data)
{
struct carm_host *host = _data;
unsigned long flags;
unsigned int state;
int rc, i, next_dev;
int reschedule = 0;
int new_state = HST_INVALID;

spin_lock_irqsave(&host->lock, flags);
state = host->state;
spin_unlock_irqrestore(&host->lock, flags);

DPRINTK("ENTER, state == %s\n", state_name[state]);

switch (state) {
case HST_PROBE_START:
new_state = HST_ALLOC_BUF;
reschedule = 1;
break;

case HST_ALLOC_BUF:
rc = carm_send_special(host, carm_fill_alloc_buf);
if (rc) {
new_state = HST_ERROR;
reschedule = 1;
}
break;

case HST_SYNC_TIME:
rc = carm_send_special(host, carm_fill_sync_time);
if (rc) {
new_state = HST_ERROR;
reschedule = 1;
}
break;

case HST_GET_FW_VER:
rc = carm_send_special(host, carm_fill_get_fw_ver);
if (rc) {
new_state = HST_ERROR;
reschedule = 1;
}
break;

case HST_PORT_SCAN:
rc = carm_send_special(host, carm_fill_scan_channels);
if (rc) {
new_state = HST_ERROR;
reschedule = 1;
}
break;

case HST_DEV_SCAN_START:
host->cur_scan_dev = -1;
new_state = HST_DEV_SCAN;
reschedule = 1;
break;

case HST_DEV_SCAN:
next_dev = -1;
for (i = host->cur_scan_dev + 1; i < CARM_MAX_PORTS; i++)
if (host->dev_present & (1 << i)) {
next_dev = i;
break;
}

if (next_dev >= 0) {
host->cur_scan_dev = next_dev;
rc = carm_array_info(host, next_dev);
if (rc) {
new_state = HST_ERROR;
reschedule = 1;
}
} else {
new_state = HST_DEV_ACTIVATE;
reschedule = 1;
}
break;

case HST_DEV_ACTIVATE: {
int activated = 0;
for (i = 0; i < CARM_MAX_PORTS; i++)
if (host->dev_active & (1 << i)) {
struct carm_port *port = &host->port[i];
struct gendisk *disk = port->disk;

set_capacity(disk, port->capacity);
add_disk(disk);
activated++;
}

printk(KERN_INFO DRV_NAME "(%s): %d ports activated\n",
pci_name(host->pdev), activated);

new_state = HST_PROBE_FINISHED;
reschedule = 1;
break;
}

case HST_PROBE_FINISHED:
up(&host->probe_sem);
break;

case HST_ERROR:
/* FIXME: TODO */
break;

default:
/* should never occur */
printk(KERN_ERR PFX "BUG: unknown state %d\n", state);
assert(0);
break;
}

if (new_state != HST_INVALID) {
spin_lock_irqsave(&host->lock, flags);
host->state = new_state;
spin_unlock_irqrestore(&host->lock, flags);
}
if (reschedule)
schedule_work(&host->fsm_task);
}

static int carm_init_wait(void *mmio, u32 bits, unsigned int test_bit)
{
unsigned int i;

for (i = 0; i < 50000; i++) {
u32 tmp = readl(mmio + CARM_LMUC);
udelay(100);

if (test_bit) {
if ((tmp & bits) == bits)
return 0;
} else {
if ((tmp & bits) == 0)
return 0;
}

cond_resched();
}

printk(KERN_ERR PFX "carm_init_wait timeout, bits == 0x%x, test_bit == %s\n",
bits, test_bit ? "yes" : "no");
return -EBUSY;
}

static void carm_init_responses(struct carm_host *host)
{
void *mmio = host->mmio;
unsigned int i;
struct carm_response *resp = (struct carm_response *) host->shm;

for (i = 0; i < RMSG_Q_LEN; i++)
resp[i].status = 0xffffffff;

writel(0, mmio + CARM_RESP_IDX);
}

static int carm_init_host(struct carm_host *host)
{
void *mmio = host->mmio;
u32 tmp;
u8 tmp8;
int rc;

DPRINTK("ENTER\n");

writel(0, mmio + CARM_INT_MASK);

tmp8 = readb(mmio + CARM_INITC);
if (tmp8 & 0x01) {
tmp8 &= ~0x01;
writeb(tmp8, CARM_INITC);
readb(mmio + CARM_INITC); /* flush */

DPRINTK("snooze...\n");
msleep(5000);
}

tmp = readl(mmio + CARM_HMUC);
if (tmp & CARM_CME) {
DPRINTK("CME bit present, waiting\n");
rc = carm_init_wait(mmio, CARM_CME, 1);
if (rc) {
DPRINTK("EXIT, carm_init_wait 1 failed\n");
return rc;
}
}
if (tmp & CARM_RME) {
DPRINTK("RME bit present, waiting\n");
rc = carm_init_wait(mmio, CARM_RME, 1);
if (rc) {
DPRINTK("EXIT, carm_init_wait 2 failed\n");
return rc;
}
}

tmp &= ~(CARM_RME | CARM_CME);
writel(tmp, mmio + CARM_HMUC);
readl(mmio + CARM_HMUC); /* flush */

rc = carm_init_wait(mmio, CARM_RME | CARM_CME, 0);
if (rc) {
DPRINTK("EXIT, carm_init_wait 3 failed\n");
return rc;
}

writel(32, mmio + CARM_CMS0);
writel(64, mmio + CARM_CMS0 + (4 * 1));
writel(128, mmio + CARM_CMS0 + (4 * 2));
writel(CARM_MSG_SIZE, mmio + CARM_CMS0 + (4 * 3));

writel(host->shm_dma & 0xffffffff, mmio + RBUF_ADDR_LO);
writel((host->shm_dma >> 16) >> 16, mmio + RBUF_ADDR_HI);
writel(RBUF_LEN, mmio + RBUF_BYTE_SZ);

tmp = readl(mmio + CARM_HMUC);
tmp |= (CARM_RME | CARM_CME | CARM_WZBC);
writel(tmp, mmio + CARM_HMUC);
readl(mmio + CARM_HMUC); /* flush */

rc = carm_init_wait(mmio, CARM_RME | CARM_CME, 1);
if (rc) {
DPRINTK("EXIT, carm_init_wait 4 failed\n");
return rc;
}

writel(0, mmio + CARM_HMPHA);
writel(INT_DEF_MASK, mmio + CARM_INT_MASK);

carm_init_responses(host);

/* start initialization, probing state machine */
spin_lock_irq(&host->lock);
assert(host->state == HST_INVALID);
host->state = HST_PROBE_START;
spin_unlock_irq(&host->lock);
schedule_work(&host->fsm_task);

DPRINTK("EXIT\n");
return 0;
}

static int carm_init_disks(struct carm_host *host)
{
unsigned int i;
int rc = 0;

for (i = 0; i < CARM_MAX_PORTS; i++) {
struct gendisk *disk;
request_queue_t *q;
struct carm_port *port;

port = &host->port[i];
port->host = host;
port->port_no = i;

disk = alloc_disk(CARM_MINORS_PER_MAJOR);
if (!disk) {
rc = -ENOMEM;
break;
}

port->disk = disk;
sprintf(disk->disk_name, DRV_NAME "%u_%u", carm_host_id, i);
sprintf(disk->devfs_name, DRV_NAME "/%u_%u", carm_host_id, i);
disk->major = host->major;
disk->first_minor = i * CARM_MINORS_PER_MAJOR;
disk->fops = &carm_bd_ops;
disk->private_data = port;

q = blk_init_queue(carm_rq_fn, &host->lock);
if (!q) {
rc = -ENOMEM;
break;
}
disk->queue = q;
blk_queue_max_hw_segments(q, CARM_MAX_SG);
blk_queue_max_phys_segments(q, CARM_MAX_SG);

q->queuedata = port;
}

return rc;
}

static void carm_free_disks(struct carm_host *host)
{
unsigned int i;

for (i = 0; i < CARM_MAX_PORTS; i++) {
struct gendisk *disk = host->port[i].disk;
if (disk) {
request_queue_t *q = disk->queue;
if (q)
blk_cleanup_queue(q);
put_disk(disk);
}
}
}

static int carm_init_shm(struct carm_host *host)
{
host->shm = pci_alloc_consistent(host->pdev, CARM_SHM_SIZE,
&host->shm_dma);
if (!host->shm)
return -ENOMEM;

host->msg_base = host->shm + RBUF_LEN;
host->msg_dma = host->shm_dma + RBUF_LEN;

memset(host->shm, 0xff, RBUF_LEN);
memset(host->msg_base, 0, PDC_SHM_SIZE - RBUF_LEN);

return 0;
}

static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
{
static unsigned int printed_version;
struct carm_host *host;
unsigned int pci_dac;
int rc;
request_queue_t *q;
unsigned int i;

if (!printed_version++)
printk(KERN_DEBUG DRV_NAME " version " DRV_VERSION "\n");

rc = pci_enable_device(pdev);
if (rc)
return rc;

rc = pci_request_regions(pdev, DRV_NAME);
if (rc)
goto err_out;

#if IF_64BIT_DMA_IS_POSSIBLE /* grrrr... */
rc = pci_set_dma_mask(pdev, 0xffffffffffffffffULL);
if (!rc) {
rc = pci_set_consistent_dma_mask(pdev, 0xffffffffffffffffULL);
if (rc) {
printk(KERN_ERR DRV_NAME "(%s): consistent DMA mask failure\n",
pci_name(pdev));
goto err_out_regions;
}
pci_dac = 1;
} else {
#endif
rc = pci_set_dma_mask(pdev, 0xffffffffULL);
if (rc) {
printk(KERN_ERR DRV_NAME "(%s): DMA mask failure\n",
pci_name(pdev));
goto err_out_regions;
}
pci_dac = 0;
#if IF_64BIT_DMA_IS_POSSIBLE /* grrrr... */
}
#endif

host = kmalloc(sizeof(*host), GFP_KERNEL);
if (!host) {
printk(KERN_ERR DRV_NAME "(%s): memory alloc failure\n",
pci_name(pdev));
rc = -ENOMEM;
goto err_out_regions;
}

memset(host, 0, sizeof(*host));
host->pdev = pdev;
host->flags = pci_dac ? FL_DAC : 0;
spin_lock_init(&host->lock);
INIT_WORK(&host->fsm_task, carm_fsm_task, host);
init_MUTEX_LOCKED(&host->probe_sem);

for (i = 0; i < ARRAY_SIZE(host->req); i++)
host->req[i].tag = i;

host->mmio = ioremap(pci_resource_start(pdev, 0),
pci_resource_len(pdev, 0));
if (!host->mmio) {
printk(KERN_ERR DRV_NAME "(%s): MMIO alloc failure\n",
pci_name(pdev));
rc = -ENOMEM;
goto err_out_kfree;
}

rc = carm_init_shm(host);
if (rc) {
printk(KERN_ERR DRV_NAME "(%s): DMA SHM alloc failure\n",
pci_name(pdev));
goto err_out_iounmap;
}

q = blk_init_queue(carm_oob_rq_fn, &host->lock);
if (!q) {
printk(KERN_ERR DRV_NAME "(%s): OOB queue alloc failure\n",
pci_name(pdev));
rc = -ENOMEM;
goto err_out_pci_free;
}
host->oob_q = q;
q->queuedata = host;

rc = register_blkdev(0, DRV_NAME);
if (rc < 0)
goto err_out_free_oob;
host->major = rc;

devfs_mk_dir(DRV_NAME);

rc = carm_init_disks(host);
if (rc)
goto err_out_blkdev_disks;

pci_set_master(pdev);

rc = request_irq(pdev->irq, carm_interrupt, SA_SHIRQ, DRV_NAME, host);
if (rc) {
printk(KERN_ERR DRV_NAME "(%s): irq alloc failure\n",
pci_name(pdev));
goto err_out_blkdev_disks;
}

rc = carm_init_host(host);
if (rc)
goto err_out_free_irq;

DPRINTK("waiting for probe_sem\n");
down(&host->probe_sem);

/* TODO: wait for probing to end */

printk(KERN_ERR DRV_NAME "(%s): registered host, %d ports, mmio %lx\n",
pci_name(pdev), (int) CARM_MAX_PORTS,
pci_resource_start(pdev, 0));
carm_host_id++;
pci_set_drvdata(pdev, host);
return 0;

err_out_free_irq:
free_irq(pdev->irq, host);
err_out_blkdev_disks:
carm_free_disks(host);
unregister_blkdev(host->major, DRV_NAME);
err_out_free_oob:
blk_cleanup_queue(host->oob_q);
err_out_pci_free:
pci_free_consistent(pdev, CARM_SHM_SIZE, host->shm, host->shm_dma);
err_out_iounmap:
iounmap(host->mmio);
err_out_kfree:
kfree(host);
err_out_regions:
pci_release_regions(pdev);
err_out:
pci_disable_device(pdev);
return rc;
}

static void carm_remove_one (struct pci_dev *pdev)
{
struct carm_host *host = pci_get_drvdata(pdev);

if (!host) {
printk(KERN_ERR PFX "BUG: no host data for PCI(%s)\n",
pci_name(pdev));
return;
}

free_irq(pdev->irq, host);
carm_free_disks(host);
devfs_remove(DRV_NAME);
unregister_blkdev(host->major, DRV_NAME);
blk_cleanup_queue(host->oob_q);
pci_free_consistent(pdev, CARM_SHM_SIZE, host->shm, host->shm_dma);
iounmap(host->mmio);
kfree(host);
pci_release_regions(pdev);
pci_disable_device(pdev);
}

static int __init carm_init(void)
{
return pci_module_init(&carm_driver);
}

static void __exit carm_exit(void)
{
pci_unregister_driver(&carm_driver);
}

module_init(carm_init);
module_exit(carm_exit);