--- linux-2.4.22/Documentation/Configure.help 2003-08-25 04:44:39.000000000 -0700 +++ linux-2.4.22-isw/Documentation/Configure.help 2003-09-24 12:22:34.000000000 -0700 @@ -2018,6 +2018,19 @@ If you choose to compile this as a module, the module will be called hptraid.o. +Intel Software RAID +CONFIG_BLK_DEV_ATARAID_ISW + ******* Experimental Driver ********* + This option enables support for the Intel Software RAID format. + Say Y or M if you have hardware which supports this format. + Currently Intel's ICH5R chipset uses this format. + This driver uses /dev/ataraid/dXpY (X and Y numbers) as device + names. + + If you choose to compile this as a module, the module will be called + iswraid.o. + + Support for Acer PICA 1 chipset CONFIG_ACER_PICA_61 This is a machine with a R4400 133/150 MHz CPU. To compile a Linux --- linux-2.4.22/drivers/ide/Config.in 2003-08-25 04:44:41.000000000 -0700 +++ linux-2.4.22-isw/drivers/ide/Config.in 2003-09-24 12:22:35.000000000 -0700 @@ -220,5 +220,6 @@ dep_tristate ' Support Promise software RAID (Fasttrak(tm)) (EXPERIMENTAL)' CONFIG_BLK_DEV_ATARAID_PDC $CONFIG_BLK_DEV_IDE $CONFIG_EXPERIMENTAL $CONFIG_BLK_DEV_ATARAID dep_tristate ' Highpoint 370 software RAID (EXPERIMENTAL)' CONFIG_BLK_DEV_ATARAID_HPT $CONFIG_BLK_DEV_IDE $CONFIG_EXPERIMENTAL $CONFIG_BLK_DEV_ATARAID dep_tristate ' Silicon Image Medley software RAID (EXPERIMENTAL)' CONFIG_BLK_DEV_ATARAID_SII $CONFIG_BLK_DEV_IDE $CONFIG_EXPERIMENTAL $CONFIG_BLK_DEV_ATARAID +dep_tristate ' Support for Intel software RAID (EXPERIMENTAL)' CONFIG_BLK_DEV_ATARAID_ISW $CONFIG_BLK_DEV_IDE $CONFIG_EXPERIMENTAL $CONFIG_BLK_DEV_SD $CONFIG_BLK_DEV_ATARAID endmenu diff -uNr linux-2.4.22/drivers/ide/raid/iswraid.c linux-2.4.22-isw/drivers/ide/raid/iswraid.c --- linux-2.4.22/drivers/ide/raid/iswraid.c 1969-12-31 17:00:00.000000000 -0700 +++ linux-2.4.22-isw/drivers/ide/raid/iswraid.c 2003-09-24 16:12:03.000000000 -0700 @@ -0,0 +1,1070 @@ +/* + * iswraid.c Copyright (C) 2003,2004 Intel Corporation. + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * You should have received a copy of the GNU General Public License + * (for example /usr/src/linux/COPYING); if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * Authors: Boji Tony Kannanthanam + * < boji dot t dot kannanthanam at intel dot com> + * Based on ataraid codebase by Arjan van de Ven + * + * + */ + +/* + * "iswraid" is an ataraid subdriver for Intel's ICH5R chipset. + * The "ataraid" module needs to be loaded before this driver + * can load. + * This subdriver differs from the other ataraid subdriver in that + * it depends on a SCSI low level driver to access RAID member + * disks instead of the ATA/IDE subsystem. + * Hence, you will also need the "ata_piix" driver which detects + * the SATA drives connected to ICH5R as SCSI devices (sda, sdb...) + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "../../scsi/scsi.h" +#include +#include "ataraid.h" +#include "iswraid.h" + +//#define DRIVERDEBUG = 1 + +#ifdef DRIVERDEBUG +#define DEBUG(s, args...) printk(s, ## args) +#else +#define DEBUG(s, args...) +#endif + +#define ICH_VERSION_STRING "Version 0.0.6" + +static int iswraid_open(struct inode *inode, struct file *filp); +static int iswraid_release(struct inode *inode, struct file *filp); +static int iswraid_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg); +static int iswraid0_make_request(request_queue_t * q, int rw, + struct buffer_head *bh); +static int iswraid1_make_request(request_queue_t * q, int rw, + struct buffer_head *bh); + +struct disk_dev { + int major; + int minor; + int device; + struct disk_dev *next; +}; + +struct disk_dev *devlist = NULL; + +struct ichdisk { + kdev_t device; + unsigned long sectors; + struct block_device *bdev; + unsigned long last_pos; +}; + +#define MAX_RAID_MEMBER_DISKS 8 +#define MAX_RAID_VOLUMES 16 + +struct iswraid { + unsigned int stride; + unsigned int disks; + unsigned long sectors; + struct geom geom; + + struct ichdisk disk[MAX_RAID_MEMBER_DISKS]; + + unsigned long cutoff[MAX_RAID_MEMBER_DISKS]; + unsigned int cutoff_disks[MAX_RAID_MEMBER_DISKS]; + int refcnt; +}; + +static struct raid_device_operations iswraid0_ops = { + open:iswraid_open, + release:iswraid_release, + ioctl:iswraid_ioctl, + make_request:iswraid0_make_request +}; + +static struct raid_device_operations iswraid1_ops = { + open:iswraid_open, + release:iswraid_release, + ioctl:iswraid_ioctl, + make_request:iswraid1_make_request +}; + +static struct iswraid raid[MAX_RAID_VOLUMES]; + +static DECLARE_MUTEX(iswraid_sem); + +static int +iswraid_ioctl(struct inode *inode, struct file *file, unsigned int cmd, + unsigned long arg) +{ + unsigned int minor; + unsigned long sectors; + + if (!inode || !inode->i_rdev) + return -EINVAL; + + minor = MINOR(inode->i_rdev) >> SHIFT; + + switch (cmd) { + + case BLKGETSIZE: /* Return device size */ + { + if (!arg) + return -EINVAL; + sectors = + ataraid_gendisk.part[MINOR(inode->i_rdev)].nr_sects; + if (MINOR(inode->i_rdev) & 15) + return put_user(sectors, (unsigned long *) arg); + return put_user(raid[minor].sectors, + (unsigned long *) arg); + break; + } + + case HDIO_GETGEO: + { + struct hd_geometry *loc = (struct hd_geometry *) arg; + unsigned short bios_cyl = raid[minor].geom.cylinders; /* truncate */ + + if (!loc) + return -EINVAL; + if (put_user + (raid[minor].geom.heads, (byte *) & loc->heads)) + return -EFAULT; + if (put_user + (raid[minor].geom.sectors, (byte *) & loc->sectors)) + return -EFAULT; + if (put_user + (bios_cyl, (unsigned short *) &loc->cylinders)) + return -EFAULT; + if (put_user + ((unsigned) ataraid_gendisk. + part[MINOR(inode->i_rdev)].start_sect, + (unsigned long *) &loc->start)) + return -EFAULT; + return 0; + } + + case HDIO_GETGEO_BIG: + { + struct hd_big_geometry *loc = + (struct hd_big_geometry *) arg; + if (!loc) + return -EINVAL; + if (put_user + (raid[minor].geom.heads, (byte *) & loc->heads)) + return -EFAULT; + if (put_user + (raid[minor].geom.sectors, (byte *) & loc->sectors)) + return -EFAULT; + if (put_user + (raid[minor].geom.cylinders, + (unsigned int *) &loc->cylinders)) + return -EFAULT; + if (put_user + ((unsigned) ataraid_gendisk. + part[MINOR(inode->i_rdev)].start_sect, + (unsigned long *) &loc->start)) + return -EFAULT; + return 0; + } + + case BLKRRPART: + { + /* Re-Read Partition Table. */ + DEBUG("iswraid: BLKRRPART. \n"); + int i; + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + down(&iswraid_sem); + if (raid[minor].refcnt > 1) { + up(&iswraid_sem); + return -EBUSY; + } + + for (i = 0; i < 16; i++) { + int m = minor + i; + if (ataraid_gendisk.part[m].nr_sects > 0) { + invalidate_device(MKDEV + (ataraid_gendisk. + major, m), 1); + } + /* Clear existing partition sizes. */ + ataraid_gendisk.part[m].start_sect = 0; + ataraid_gendisk.part[m].nr_sects = 0; + /* Reset the Block Size so that the partition table can be read. */ + set_blocksize(MKDEV(ataraid_gendisk.major, m), + ICH_DISK_BLOCK_SIZE); + } + ataraid_register_disk(minor, raid[minor].sectors); + up(&iswraid_sem); + return 0; + } + default: + return blk_ioctl(inode->i_rdev, cmd, arg); + }; + + return 0; +} + +static unsigned long +partition_map_normal(unsigned long block, unsigned long partition_off, + unsigned long partition_size, int stride) +{ + return block + partition_off; +} + +static int +iswraid0_make_request(request_queue_t * q, int rw, struct buffer_head *bh) +{ + + unsigned long rsect; + unsigned long rsect_left, rsect_accum = 0; + unsigned long block; + unsigned int disk = 0, real_disk = 0; + int i; + int device; + struct iswraid *thisraid; + + DEBUG("iswraid: iswraid0_make_request\n"); + rsect = bh->b_rsector; + + /* Ok. We need to modify this sector number to a new disk + new sector number. + * If there are disks of different sizes, this gets tricky. + * Example with 3 disks (1Gb, 4Gb and 5 GB): + * The first 3 Gb of the "RAID" are evenly spread over the 3 disks. + * Then things get interesting. The next 2Gb (RAID view) are spread across disk 2 and 3 + * and the last 1Gb is disk 3 only. + * + * the way this is solved is like this: We have a list of "cutoff" points where everytime + * a disk falls out of the "higher" count, we mark the max sector. So once we pass a cutoff + * point, we have to divide by one less. + */ + + device = (bh->b_rdev >> SHIFT) & MAJOR_MASK; + thisraid = &raid[device]; + if (thisraid->stride == 0) + thisraid->stride = 1; + + /* Partitions need adding of the start sector of the partition to the requested sector */ + + rsect = + partition_map_normal(rsect, + ataraid_gendisk.part[MINOR(bh->b_rdev)]. + start_sect, + ataraid_gendisk.part[MINOR(bh->b_rdev)]. + nr_sects, thisraid->stride); + + /* Woops we need to split the request to avoid crossing a stride barrier */ + if ((rsect / thisraid->stride) != + ((rsect + (bh->b_size / 512) - 1) / thisraid->stride)) { + return -1; + } + + rsect_left = rsect; + + for (i = 0; i < MAX_RAID_MEMBER_DISKS; i++) { + if (thisraid->cutoff_disks[i] == 0) + break; + if (rsect > thisraid->cutoff[i]) { + /* we're in the wrong area so far */ + rsect_left -= thisraid->cutoff[i]; + rsect_accum += + thisraid->cutoff[i] / thisraid->cutoff_disks[i]; + } else { + block = rsect_left / thisraid->stride; + disk = block % thisraid->cutoff_disks[i]; + block = + (block / thisraid->cutoff_disks[i]) * + thisraid->stride; + rsect = + rsect_accum + (rsect_left % thisraid->stride) + + block; + break; + } + } + + for (i = 0; i < MAX_RAID_MEMBER_DISKS; i++) { + if ((disk == 0) && (thisraid->disk[i].sectors > rsect_accum)) { + real_disk = i; + break; + } + if ((disk > 0) && (thisraid->disk[i].sectors >= rsect_accum)) { + disk--; + } + } + disk = real_disk; + /* + * The new BH_Lock semantics in ll_rw_blk.c guarantee that this + * is the only IO operation happening on this bh. + */ + bh->b_rdev = thisraid->disk[disk].device; + bh->b_rsector = rsect; + + /* + * Let the main block layer submit the IO and resolve recursion: + */ + return 1; + +} + +static int +iswraid1_write_request(request_queue_t * q, int rw, struct buffer_head *bh) +{ + struct buffer_head *bh1; + struct ataraid_bh_private *private; + int device; + int i; + + DEBUG("iswraid: iswraid1_write_request\n"); + + device = (bh->b_rdev >> SHIFT) & MAJOR_MASK; + private = ataraid_get_private(); + if (private == NULL) + BUG(); + + private->parent = bh; + + atomic_set(&private->count, raid[device].disks); + + for (i = 0; i < raid[device].disks; i++) { + bh1 = ataraid_get_bhead(); + /* If this ever fails we're doomed */ + if (!bh1) + BUG(); + + /* dupe the bufferhead and update the parts that need to be different */ + memcpy(bh1, bh, sizeof (*bh)); + + bh1->b_end_io = ataraid_end_request; + bh1->b_private = private; + bh1->b_rsector += ataraid_gendisk.part[MINOR(bh->b_rdev)].start_sect; /* partition offset */ + bh1->b_rdev = raid[device].disk[i].device; + + /* update the last known head position for the drive */ + raid[device].disk[i].last_pos = + bh1->b_rsector + (bh1->b_size >> 9); + + generic_make_request(rw, bh1); + } + return 0; +} + +static int +iswraid1_read_request(request_queue_t * q, int rw, struct buffer_head *bh) +{ + int device; + int dist; + int bestsofar, bestdist, i; + static int previous; + + DEBUG("iswraid: iswraid1_read_request\n"); + + /* Reads are simple in principle. Pick a disk and go. + * Initially I cheat by just picking the one which the last known + * head position is closest by. + * Later on, online/offline checking and performance needs adding + */ + + device = (bh->b_rdev >> SHIFT) & MAJOR_MASK; + bh->b_rsector += ataraid_gendisk.part[MINOR(bh->b_rdev)].start_sect; + + bestsofar = 0; + bestdist = raid[device].disk[0].last_pos - bh->b_rsector; + if (bestdist < 0) + bestdist = -bestdist; + if (bestdist > 4095) + bestdist = 4095; + + for (i = 1; i < raid[device].disks; i++) { + dist = raid[device].disk[i].last_pos - bh->b_rsector; + if (dist < 0) + dist = -dist; + if (dist > 4095) + dist = 4095; + + if (bestdist == dist) { /* it's a tie; try to do some read balancing */ + if ((previous > bestsofar) && (previous <= i)) + bestsofar = i; + previous = (previous + 1) % raid[device].disks; + } else if (bestdist > dist) { + bestdist = dist; + bestsofar = i; + } + + } + + bh->b_rdev = raid[device].disk[bestsofar].device; + raid[device].disk[bestsofar].last_pos = + bh->b_rsector + (bh->b_size >> 9); + + /* + * Let the main block layer submit the IO and resolve recursion: + */ + return 1; +} + +static int +iswraid1_make_request(request_queue_t * q, int rw, struct buffer_head *bh) +{ + DEBUG("iswraid: iswraid1_make_request\n"); + + /* Read and Write are totally different cases; split them totally here */ + if (rw == READA) + rw = READ; + + if (rw == READ) + return iswraid1_read_request(q, rw, bh); + else + return iswraid1_write_request(q, rw, bh); + + return 0; +} + +static unsigned long __init +calc_ichblock_offset(int major, int minor) +{ + unsigned long lba = 0; + kdev_t dev; + struct gendisk *gdisk; + + DEBUG("iswraid: calc_ichblock_offset\n"); + + dev = MKDEV(major, minor); + gdisk = get_gendisk(dev); + + if (gdisk == NULL) + return 0; + + /* gendisk->sizes are in 1024 sized blocks. Therefore + * multiplying by 2 to get 512 sized blocks. + */ + lba = (gdisk->sizes[minor] * 2) - 2; + return lba; +} + +static int __init +read_disk_sb(int major, int minor, unsigned char *buffer, int bufsize, + unsigned long mpb_pos, int mpb_blocks) +{ + int ret = -EINVAL; + struct buffer_head *bh = NULL; + kdev_t dev = MKDEV(major, minor); + + DEBUG("iswraid: read_disk_sb\n"); + + if (blksize_size[major] == NULL) /* device doesn't exist */ + return -EINVAL; + + set_blocksize(dev, ICH_DISK_BLOCK_SIZE); + + bh = bread(dev, mpb_pos, mpb_blocks * ICH_DISK_BLOCK_SIZE); + + if (bh) { + memcpy(buffer, bh->b_data, bufsize); + } else { + printk(KERN_ERR "iswraid: Error reading superblock.\n"); + goto abort; + } + ret = 0; + abort: + if (bh) + brelse(bh); + return ret; +} + +/* + * Generate checksum of contents of Raid metadata + * for mpbSize/sizeof(U32) words + */ +static u32 __init +compute_checksum(u32 * buffer, u32 mpbSize) +{ + u32 i; + u32 sum = 0; + DEBUG("iswraid: compute_checksum\n"); + for (i = 0; i < (mpbSize / sizeof (u32)); i++) + sum += *buffer++; + DEBUG("iswraid: checksum calculated from metadata is%lu \n", sum); + return sum; +} + +/* + * The raid member disks are scsi devices. We do an inquiry to + * to determine the disk serial number. This information is used + * to order the member disks correctly in the raid array. + */ +static int __init +iswraid_do_inquiry(int major, int minor, unsigned char *ich_serial_no) +{ + kdev_t dev; + int i, retval; + unsigned char *cmd; + unsigned char *resppage; + unsigned char buffer[1024]; + Scsi_Device *SDpnt; + + DEBUG("iswraid: iswraid_do_inquiry\n"); + + if (blksize_size[major] == NULL) /* device doesn't exist */ + return -EINVAL; + + dev = MKDEV(major, minor); + + SDpnt = blk_dev[MAJOR(dev)].queue(dev)->queuedata; + + if (SDpnt == NULL) + return -1; + + for (i = 0; i < 1024; i++) + buffer[i] = 0; + + /* input data size. No input. */ + *((int *) buffer) = 0; + /* output buffer size.. */ + *(((int *) buffer) + 1) = 1024; + /* cmd is the SCSI command to send */ + cmd = (char *) (((int *) buffer) + 2); + + cmd[0] = 0x12; /* Opcode INQUIRY=12h */ + cmd[1] = 0x01; /* EVPD=1. Return the vital product data specified in page code */ + cmd[2] = 0x80; /* Page Code Unit serial number page=80h */ + cmd[3] = 0x00; /* Reserved byte */ + cmd[4] = 0xff; /* allocation length */ + cmd[5] = 0x00; /* Control byte */ + retval = kernel_scsi_ioctl(SDpnt, SCSI_IOCTL_SEND_COMMAND, buffer); + if (retval) { + printk(KERN_INFO + "iswraid: ERROR kernel_scsi_ioctl(SCSI_IOCTL_SEND_COMMAND) failed. code= %d\n", + retval); + return -1; + } + + /* VPD page is at location resppage */ + resppage = buffer + 8; + /* Third byte is Page Length */ + for (i = 0; i < resppage[3]; i++) { + if (resppage[4 + i] != ' ') + ich_serial_no[i] = resppage[4 + i]; + else { + ich_serial_no[i] = '\0'; + break; + } + } + return 0; +} + +/* Used to hold the RAID volume identifier and to distinguish + * between RAID arrays of same type. + */ +static u32 RaidID = 0; + +static void __init +probedisk(struct disk_dev *rdisk, int device, int raidlevel) +{ + int i = 0; + struct _RaidMpb *mpb; + unsigned long sb_offset; + int sigLen = 0; + static unsigned char block[4096]; + static unsigned char block2[2048]; + struct block_device *bdev; + int major, minor; + + DEBUG("iswraid: probedisk for RAID level %d \n", raidlevel); + + /* already assigned to another array */ + if (rdisk->device != -1) + return; + + major = rdisk->major; + minor = rdisk->minor; + + /* + * Calculate the position of the block containing + * the first block of raid metadata. + */ + sb_offset = calc_ichblock_offset(major, minor); + + if (sb_offset == 0) + return; + + /* Read the RAID metadata header */ + if (read_disk_sb + (major, minor, (unsigned char *) &block, sizeof (block), sb_offset, + 1)) + return; + + mpb = (struct _RaidMpb *) █ + + /* Check Signature and version info */ + unsigned char ich_raid_signature[MAX_SIGNATURE_LENGTH]; + strncpy(ich_raid_signature, MPB_SIGNATURE, strlen(MPB_SIGNATURE) + 1); + strncat(ich_raid_signature, MPB_VERSION_RAID1, + strlen(MPB_VERSION_RAID1) + 1); + sigLen = strlen(ich_raid_signature); + if (strncmp(mpb->sig.text, ich_raid_signature, sigLen) > 0) + return; + + DEBUG("iswraid: Intel RAID Member disk found.\n"); + DEBUG("iswraid: Major:%d Minor:%d Signature: %s \n", major, minor, + mpb->sig.text); + + //FIXME: The section below should read in all of MPB if > 512. + //Cannot test currently with ICH5R. + if (mpb->mpbSize > 512) { + /* OK now to read in the rest of the MPB. First from the + * size lets figure out how many more sectors. + */ + DEBUG("iswraid: Size of RAID metadata is %li \n", mpb->mpbSize); + int TotalBlocks = (mpb->mpbSize) / ICH_DISK_BLOCK_SIZE; + if (TotalBlocks > 1) { + //For ICH5R we will most probably won't go here. + DEBUG + ("iswraid: Total blocks: %d, read %d blocks starting from %li \n", + TotalBlocks, TotalBlocks - 1, + sb_offset - (TotalBlocks - 1)); + if (read_disk_sb + (major, minor, (unsigned char *) &block2, + sizeof (block2), sb_offset - (TotalBlocks - 1), + (TotalBlocks - 1))) { + DEBUG + ("iswraid: failed to read rest of RAID metadata\n"); + return; + } + memcpy(&block[ICH_DISK_BLOCK_SIZE], block2, + sizeof (block2) - ICH_DISK_BLOCK_SIZE); + } + } + + /* Compare checksum read from MPB with newly calculated value */ + + u32 checksum = mpb->checkSum; + mpb->checkSum = 0; + if (checksum != compute_checksum((u32 *) mpb, mpb->mpbSize)) { + printk("iswraid: ERROR: checksum did not match\n"); + return; + } + + DEBUG("iswraid: Found an Intel Raid volume: \n"); + DEBUG("iswraid: Contains %d member disks\n", mpb->numDisks); + + /* RaidID is used to track whether the disk belongs to the RAID array + * we are currently detecting. Using FamilyNum. + * FIXME: Robust checking, using generationNum in combination with + * familyNum + */ + if (RaidID == 0) + RaidID = mpb->familyNum; + else { + if (RaidID != mpb->familyNum) + return; + } + +#ifdef DRIVERDEBUG + struct _MpbDisk *disk; + /* Go thru the RAID member disks information */ + disk = (struct _MpbDisk *) (mpb->diskTbl); + + for (i = 0; i < mpb->numDisks; i++) { + DEBUG("iswraid: Disk %d, Serial %s\n", i, + mpb->diskTbl[i].serial.serial); + DEBUG("iswraid: Disk %d: total blocks %lu SCSI ID %d \n", i, + disk->totalBlocks, disk->scsiId); + DEBUG("iswraid: Serial %s \n", disk->serial.serial); + disk = + (struct _MpbDisk *) ((unsigned long) disk + + (unsigned long) sizeof (struct + _MpbDisk)); + } +#endif + + /* Go thru the RAID DEV structure */ + struct _MpbRaidDev *raiddev; + i = mpb->numDisks; + raiddev = (struct _MpbRaidDev *) (&mpb->diskTbl[i]); + + DEBUG("iswraid: Name of the RAID volume %s \n", raiddev->serial.serial); + DEBUG("iswraid: RAID volume State %lu \n", raiddev->status); + + /* Parse RAID metadata for relevant information */ + struct _MpbRaidVol *vol; + vol = &raiddev->raidVol; + +#ifdef DRIVERDEBUG + if (vol->migrState != 0) { + DEBUG("iswraid: RAID Volume %s is in migrating state\n", + raiddev->serial.serial); + } +#endif + + struct _MpbRaidMap *map; + map = &vol->loMap; + + /* different raidlevel. Not now... later */ + if (map->raidLevel != raidlevel) + return; + + /* We do not attempt to do any kind of error recovery on + * RAID array errors. Just do not claim arrays which are + * in abnormal state. Use Option ROM or config utility to + * fix for now. + */ + if (map->mapState != 0) { + printk(KERN_INFO "iswraid: RAID volume not in normal state.\n"); + printk(KERN_INFO "iswraid: Ignoring this RAID volume.\n"); + return; + } + + /* OK, now we need to uniquely match this disk to one of the + * RAID member disk (since order of the disks in the array is + * important. Use Disk serial number for this purpose. + */ + static unsigned char ich_disk_serial[32]; + if (iswraid_do_inquiry(major, minor, (unsigned char *) &ich_disk_serial) + != 0) { + printk(KERN_INFO "iswraid: inquiry returned error\n"); + return; + } + + DEBUG + ("iswraid: For disk with major:%d and minor %d, Serial number is %s\n", + major, minor, ich_disk_serial); + + for (i = 0; i < mpb->numDisks; i++) { + if (strcmp + (ich_disk_serial, + mpb->diskTbl[map->diskOrdTbl[i]].serial.serial) == 0) { + DEBUG + ("iswraid: This disk %s is the %d th disk in array\n", + ich_disk_serial, i); + break; + } + } + + if (i == mpb->numDisks) + return; + + /* We have all the info. Lets fill in the ataraid structure */ + bdev = bdget(MKDEV(major, minor)); + if (bdev + && blkdev_get(bdev, FMODE_READ | FMODE_WRITE, 0, BDEV_RAW) == 0) + raid[device].disk[i].bdev = bdev; + + //FIXME: claim this device so that use cannot access RAID member disk. + DEBUG + ("iswraid: blocksPerMember= %lu blocksPerStrip=%lu numDataStripes=%lu\n", + map->blocksPerMember, map->blocksPerStrip, map->numDataStripes); + raid[device].disk[i].device = MKDEV(major, minor); + raid[device].disk[i].sectors = map->blocksPerMember; + raid[device].stride = map->blocksPerStrip; + raid[device].disks = map->numMembers; + raid[device].sectors = raiddev->numDataBlocksLo; + raid[device].geom.heads = 255; + raid[device].geom.sectors = 63; + raid[device].geom.cylinders = + raid[device].sectors / raid[device].geom.heads / + raid[device].geom.sectors; + DEBUG("iswraid: raid[%d].geom.cylinders %lu \n", device, + raid[device].geom.cylinders); + + rdisk->device = device; +} + +static void __init +fill_cutoff(int device) +{ + int i, j; + unsigned long smallest; + unsigned long bar; + int count; + + bar = 0; + for (i = 0; i < MAX_RAID_MEMBER_DISKS; i++) { + smallest = ~0; + for (j = 0; j < MAX_RAID_MEMBER_DISKS; j++) + if ((raid[device].disk[j].sectors < smallest) + && (raid[device].disk[j].sectors > bar)) + smallest = raid[device].disk[j].sectors; + count = 0; + for (j = 0; j < MAX_RAID_MEMBER_DISKS; j++) + if (raid[device].disk[j].sectors >= smallest) + count++; + + smallest = smallest * count; + bar = smallest; + raid[device].cutoff[i] = smallest; + raid[device].cutoff_disks[i] = count; + } +} + +static __init int +iswraid_init_one(int device, int raidlevel) +{ + int i, count; + + struct disk_dev *rdisk_p; + rdisk_p = devlist; + while (rdisk_p != NULL) { + DEBUG("iswraid: going to probe disk: major %d and %d \n", + rdisk_p->major, rdisk_p->minor); + probedisk(rdisk_p, device, raidlevel); + rdisk_p = rdisk_p->next; + } + + /* Check that all the RAID members were properly detected + * There should be raid[device].disks detected and info + * filled in the array. This way we can detect an array + * with missing disk. Overkill ? + */ + for (i = 0; i < raid[device].disks; i++) { + if (raid[device].disk[i].bdev == NULL) { + printk(KERN_INFO + "iswraid: ERROR: RAID array missing disk(s).\n"); + printk(KERN_INFO + "iswraid: ERROR: Not registering this RAID array.\n"); + /* Clean up */ + int j; + for (j = 0; j < MAX_RAID_MEMBER_DISKS; j++) { + struct block_device *bdev = + raid[device].disk[j].bdev; + raid[device].disk[j].bdev = NULL; + if (bdev) + blkdev_put(bdev, BDEV_RAW); + } + return -ENODEV; + } + } + + if (raidlevel == 0) + fill_cutoff(device); + + /* Initialize the gendisk structure */ + + ataraid_register_disk(device, raid[device].sectors); + + count = 0; + + for (i = 0; i < MAX_RAID_MEMBER_DISKS; i++) { + if (raid[device].disk[i].device != 0) { + printk(KERN_INFO + "iswraid: RAID member drive %i is %li Mb (%i / %i) \n", + i, raid[device].disk[i].sectors / 2048, + MAJOR(raid[device].disk[i].device), + MINOR(raid[device].disk[i].device)); + count++; + } + } + + if (count) { + printk(KERN_INFO + "iswraid: Raid%i array consists of %i drives. \n", + raidlevel, count); + return 0; + } else { + return -ENODEV; + } +} + +static int __init +detect_devlist(void) +{ + + struct disk_dev *devlist_tmp = NULL; + struct block_device *bdev; + int major, minor; + int dev_count = 0; + + DEBUG("iswraid: detect_devlist: looking for SCSI disks\n"); + + major = SCSI_DISK0_MAJOR; + + minor = 0; + do { + /* This is really a corner case: checking for all possible + * SCSI disks. What are the odds ? + */ + if (minor > 240) { + minor = 0; + if (major == SCSI_DISK7_MAJOR) + break; + if (major == SCSI_DISK0_MAJOR) + major = SCSI_DISK1_MAJOR; + else + major++; + } + + bdev = bdget(MKDEV(major, minor)); + /* Check if it is a valid device */ + if (bdev + && blkdev_get(bdev, FMODE_READ | FMODE_WRITE, 0, + BDEV_RAW) == 0) { + /* a real physical device found, add to devlist */ + + devlist_tmp = + kmalloc(sizeof (struct disk_dev), GFP_KERNEL); + devlist_tmp->major = major; + devlist_tmp->minor = minor; + DEBUG + ("iswraid: detect_devlist: Found possible device: major %d and minor %d \n", + major, minor); + devlist_tmp->device = -1; + devlist_tmp->next = devlist; + devlist = devlist_tmp; + dev_count++; + } else { + DEBUG("iswraid: found %d SCSI Devices to probe \n", + dev_count); + /* SCSI disks are claimed sequentially. So we can stop searching when + * we encounter the first invalid device. + */ + break; + } + minor = minor + 16; + + } while (1); + + return dev_count; +} + +static __init int +iswraid_init(void) +{ + int retval, device, count = 0; + int devlist_count = 0; + int i, j; + + printk(KERN_INFO "iswraid: Intel(tm) Software RAID driver %s \n", + ICH_VERSION_STRING); + + /* Initialize the raid structure to init values */ + for (i = 0; i < MAX_RAID_VOLUMES; i++) { + raid[i].disks = 0; + raid[i].refcnt = 0; + for (j = 0; j < MAX_RAID_MEMBER_DISKS; j++) { + raid[i].disk[j].bdev = NULL; + } + } + + devlist_count = detect_devlist(); + + if (devlist_count == 0) { + DEBUG("iswraid: No SCSI disks found \n"); + printk(KERN_DEBUG "iswraid: No raid array found\n"); + return -ENODEV; + } + + DEBUG("iswraid: detecting RAID 0 volumes\n"); + do { + RaidID = 0; + device = ataraid_get_device(&iswraid0_ops); + if (device < 0) + break; + retval = iswraid_init_one(device, 0); + if (retval) { + ataraid_release_device(device); + break; + } else { + count++; + } + } while (1); + + DEBUG("iswraid: detecting RAID 1 volumes\n"); + do { + RaidID = 0; + device = ataraid_get_device(&iswraid1_ops); + if (device < 0) + break; + retval = iswraid_init_one(device, 1); + if (retval) { + ataraid_release_device(device); + break; + } else { + count++; + } + } while (1); + + if (count) { + printk(KERN_INFO "iswraid: Found %d RAID array(s) \n", count); + return 0; + } + printk(KERN_DEBUG "iswraid: No raid array found\n"); + return -ENODEV; +} + +static void __exit +iswraid_exit(void) +{ + int i, device; + for (device = 0; device < MAX_RAID_VOLUMES; device++) { + for (i = 0; i < MAX_RAID_MEMBER_DISKS; i++) { + struct block_device *bdev = raid[device].disk[i].bdev; + raid[device].disk[i].bdev = NULL; + if (bdev) + blkdev_put(bdev, BDEV_RAW); + } + raid[device].refcnt = 0; + if (raid[device].sectors) + ataraid_release_device(device); + } +} + +static int +iswraid_open(struct inode *inode, struct file *filp) +{ + MOD_INC_USE_COUNT; + unsigned int minor; + + if (!inode || !inode->i_rdev) + return -EINVAL; + + minor = MINOR(inode->i_rdev) >> SHIFT; + down(&iswraid_sem); + raid[minor].refcnt++; + up(&iswraid_sem); + + return 0; +} +static int +iswraid_release(struct inode *inode, struct file *filp) +{ + MOD_DEC_USE_COUNT; + unsigned int minor; + if (!inode || !inode->i_rdev) + return -EINVAL; + + minor = MINOR(inode->i_rdev) >> SHIFT; + down(&iswraid_sem); + raid[minor].refcnt--; + up(&iswraid_sem); + + return 0; +} + +module_init(iswraid_init); +module_exit(iswraid_exit); +MODULE_LICENSE("GPL"); diff -uNr linux-2.4.22/drivers/ide/raid/iswraid.h linux-2.4.22-isw/drivers/ide/raid/iswraid.h --- linux-2.4.22/drivers/ide/raid/iswraid.h 1969-12-31 17:00:00.000000000 -0700 +++ linux-2.4.22-isw/drivers/ide/raid/iswraid.h 2003-09-24 12:22:51.000000000 -0700 @@ -0,0 +1,98 @@ +/* + * iswraid.h Copyright (c) 2003,2004 Intel Corporation + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * You should have received a copy of the GNU General Public License + * (for example /usr/src/linux/COPYING); if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Author(s): Boji Tony Kannanthanam + * < boji dot t dot kannanthanam at intel dot com> + * + */ + +// "12345678901234567890123456789012" +#define MPB_SIGNATURE "Intel Raid ISM Cfg Sig. " +#define MPB_VERSION_RAID0 "1.0.00" +#define MPB_VERSION_RAID1 "1.1.00" +#define MAX_SIGNATURE_LENGTH 32 +#define MAX_RAID_SERIAL_LEN 16 +#define ICH_DISK_BLOCK_SIZE 512 + +struct MpbSerial { + char serial[MAX_RAID_SERIAL_LEN]; +}; + +/* DISK CONFIGURATION INFO */ +struct _MpbDisk { + struct MpbSerial serial; // ascii serial number + u32 totalBlocks; // total blocks + u32 scsiId; // scsi ID + u32 status; // Same as Disk.status +#define MPB_DISK_FILLERS (5) // filler space reserved for + u32 filler[MPB_DISK_FILLERS]; // future expansion +}; + +/* RAID MAP CONFIGURATION INFO */ +struct _MpbRaidMap { + + u32 pbaOfLba0; // start address of partition + u32 blocksPerMember; // blocks per member + u32 numDataStripes; // number of data stripes + u16 blocksPerStrip; + u8 mapState; // Normal, Uninitialized, Degraded, Failed + u8 raidLevel; // 0, 1 + u8 numMembers; // number of member disks + u8 reserved[3]; +#define MPB_RAID_MAP_FILLERS (7) // expansion area + u32 filler[MPB_RAID_MAP_FILLERS]; // expansion area + u32 diskOrdTbl[1]; +}; + +/* RAID VOLUME INFO */ + +struct _MpbRaidVol { + u32 reserved[2]; + u8 migrState; // Normal or Migrating + u8 migrType; // Initializing, Rebuilding, ... + u8 dirty; + u8 fill[1]; +#define MPB_RAID_VOL_FILLERS (5) // expansion area keeps the loMap + u32 filler[MPB_RAID_VOL_FILLERS]; + struct _MpbRaidMap loMap; +}; + +/* RAID DEVICE CONFIGURATION INFO */ +struct _MpbRaidDev { + struct MpbSerial serial; // serial number + u32 numDataBlocksLo; // Data blocks on device (low 32 bits) + u32 numDataBlocksHi; // Data blocks on device (high 32 bits) + u32 status; // Persistent RaidDev status + u32 reservedBlocks; // Reserved blocks at beginning of volume +#define MPB_RAID_DEV_FILLERS (12) + u32 filler[MPB_RAID_DEV_FILLERS]; + struct _MpbRaidVol raidVol; +}; + +struct RaidCfgSig { + char text[MAX_SIGNATURE_LENGTH]; +}; + +struct _RaidMpb { + struct RaidCfgSig sig; + u32 checkSum; // MPB Checksum + u32 mpbSize; // Size of MPB + u32 familyNum; // Checksum from first time this config was written + u32 generationNum; + u32 reserved[2]; + u8 numDisks; // Number of configured disks + u8 fill[3]; +#define RAID_MPB_FILLERS (39) + u32 filler[RAID_MPB_FILLERS]; + struct _MpbDisk diskTbl[1]; +}; diff -uNr linux-2.4.22/drivers/ide/raid/Makefile linux-2.4.22-isw/drivers/ide/raid/Makefile --- linux-2.4.22/drivers/ide/raid/Makefile 2003-06-13 07:51:34.000000000 -0700 +++ linux-2.4.22-isw/drivers/ide/raid/Makefile 2003-09-24 12:22:35.000000000 -0700 @@ -13,6 +13,7 @@ obj-$(CONFIG_BLK_DEV_ATARAID_PDC) += pdcraid.o obj-$(CONFIG_BLK_DEV_ATARAID_HPT) += hptraid.o obj-$(CONFIG_BLK_DEV_ATARAID_SII) += silraid.o +obj-$(CONFIG_BLK_DEV_ATARAID_ISW) += iswraid.o EXTRA_CFLAGS := -I../