/* * cramit -- compress a file one page a time, allow random page access. * * Copyright (C) 2001 zhaoway * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* Shamelessly stolen from Linus' mkcramfs.c */ #include #include #include #include #include #include #include #include #include #include #include typedef unsigned char u8; typedef unsigned short u16; typedef unsigned int u32; static const char *progname = "cramit"; static void usage(void) { fprintf(stderr, "Usage: %s infile outfile\n", progname); exit(1); } /* * If DO_HOLES is defined, then cramit can create explicit holes in the * data, which saves 26 bytes per hole (which is a lot smaller a * saving than most filesystems). */ /* #define DO_HOLES 1 */ #define PAGE_CACHE_SIZE (4096) /* Assumes PAGE_CACHE_SIZE as block size. */ static unsigned int blksize = PAGE_CACHE_SIZE; struct entry { unsigned int size; void *uncompressed; }; #ifdef DO_HOLES /* * Returns non-zero iff the first LEN bytes from BEGIN are all NULs. */ static int is_zero(char const *begin, unsigned len) { return (len-- == 0 || (begin[0] == '\0' && (len-- == 0 || (begin[1] == '\0' && (len-- == 0 || (begin[2] == '\0' && (len-- == 0 || (begin[3] == '\0' && memcmp(begin, begin + 4, len) == 0)))))))); } #else /* !DO_HOLES */ # define is_zero(_begin,_len) (0) /* Never create holes. */ #endif /* !DO_HOLES */ static unsigned int do_compress(char *base, char *uncompressed, unsigned int size) { unsigned long original_size = size; unsigned long new_size; unsigned long blocks = (size - 1) / blksize + 1; unsigned long curr = 4 * blocks; unsigned int offset = 0; int change; do { unsigned long len = 2 * blksize; unsigned int input = size; *(u32 *) (base + offset) = curr; offset += 4; if (input > blksize) input = blksize; size -= input; if (!is_zero (uncompressed, input)) { compress(base + curr, &len, uncompressed, input); curr += len; } uncompressed += input; if (len > blksize*2) { /* (I don't think this can happen with zlib.) */ printf("AIEEE: block \"compressed\" to > 2*blocklength (%ld)\n", len); exit(1); } *(u32 *) (base + offset) = curr; offset += 4; } while (size); curr = (curr + 3) & ~3; new_size = curr; /* TODO: Arguably, original_size in these 2 lines should be st_blocks * 512. But if you say that then perhaps administrative data should also be included in both. */ change = new_size - original_size; printf("%6.2f%% (%+d bytes)\n", (change * 100) / (double) original_size, change); return curr; } /* * Traverse the entry tree, writing data for every item that has * non-null entry->compressed (i.e. every symlink and non-empty * regfile). */ static unsigned int write_data(struct entry *entry, char *base) { return do_compress(base, entry->uncompressed, entry->size); } /* * Usage: * cramit infile outfile */ int main(int argc, char **argv) { struct stat st; struct entry *entry; char *outfile; unsigned int offset; ssize_t written; int infd, outfd; char const *dirname; if (argc) progname = argv[0]; if (argc != 3) usage(); if (stat(dirname = argv[1], &st) < 0) { perror(argv[1]); exit(1); } outfd = open(argv[2], O_WRONLY | O_CREAT | O_TRUNC, 0666); entry = calloc(1, sizeof(struct entry)); if (!entry) { perror(NULL); exit(5); } entry->size = st.st_size; infd = open(argv[1], O_RDONLY); if (infd < 0) { perror("open"); } entry->uncompressed = mmap(NULL, entry->size, PROT_READ, MAP_PRIVATE, infd, 0); if (-1 == (int) (long) entry->uncompressed) { perror("mmap"); exit(5); } /* TODO: Why do we use a private/anonymous mapping here followed by a write below, instead of just a shared mapping and a couple of ftruncate calls? Is it just to save us having to deal with removing the file afterwards? If we really need this huge anonymous mapping, we ought to mmap in smaller chunks, so that the user doesn't need nn MB of RAM free. If the reason is to be able to write to un-mmappable block devices, then we could try shared mmap and revert to anonymous mmap if the shared mmap fails. */ outfile = mmap(NULL, entry->size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (-1 == (int) (long) outfile) { perror("Compressed file map"); exit(1); } offset = write_data(entry, outfile); /* We always write a multiple of blksize bytes, so that losetup works. */ offset = ((offset - 1) | (blksize - 1)) + 1; printf("Everything: %d kilobytes\n", offset >> 10); written = write(outfd, outfile, offset); if (written < 0) { perror("Compressed file"); exit(1); } if (offset != written) { fprintf(stderr, "Compressed file write failed (%d %d)\n", written, offset); exit(1); } return 0; }