├── Makefile ├── README.md ├── run.sh ├── test_brw.c ├── ub.c ├── ub.h └── ubd.c /Makefile: -------------------------------------------------------------------------------- 1 | ifneq ($(KERNELRELEASE),) 2 | obj-m := ub.o 3 | else 4 | 5 | VERSION = $(shell uname -r) 6 | INSTALL_DIR = /usr/sbin 7 | KERNEL_DIR := /lib/modules/$(shell uname -r)/build 8 | MODULE_DIR := /lib/modules/$(shell uname -r)/ 9 | PWD := $(shell pwd) 10 | 11 | CFLAGS = -Wall -O2 -D_LARGEFILE_SOURCE -D_GNU_SOURCE 12 | 13 | all: ub.ko ubd test_brw 14 | 15 | ub.ko: ub.c ub.h 16 | $(MAKE) -C $(KERNEL_DIR) M=$(PWD) 17 | 18 | ubd: ubd.c ub.h 19 | cc $(CFLAGS) $< -o $@ 20 | 21 | test_brw: test_brw.c 22 | cc $(CFLAGS) $< -o $@ 23 | 24 | clean: 25 | $(MAKE) -C $(KERNEL_DIR) M=$(PWD) clean 26 | rm -f ubd test_brw TAGS *.symvers *~ 27 | 28 | install: all 29 | # XXX This is the new official way, but I haven't tested it. 30 | # $(MAKE) -C $(KERNEL_DIR) M=$(PWD) modules_install 31 | cp ub.ko $(MODULE_DIR) 32 | cp ubd $(INSTALL_DIR) 33 | 34 | TAGS: 35 | etags *.c *.h 36 | 37 | endif 38 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Userspace block device driver 2 | 3 | This is a simple lightweight implementation of a kernel to userspace 4 | block device driver interface. 5 | 6 | Linux Kernel 2.6 has made it much more reliable to handle block device 7 | requests in userspace due to the separation of writeback into the 'pdflush' 8 | thread lessening the likelyhood of deadlocks (reentrancy in the userspace 9 | deamon). 10 | 11 | I've had very good results with this code under quite heavy memory 12 | pressure (you still need a carefully written userspace which mlocks itself 13 | into memory and avoids doing certain things). 14 | 15 | I wrote it because the nbd and enbd implementations didn't provide a 16 | nice and/or simple interface for a local userspace daemon. 17 | 18 | enbd was closer to what I needed but when I looked at it I thought it's 19 | userspace blocking on a ioctl was an ugly design - plus it was overcomplicated 20 | with enbd specific features. 21 | 22 | I chose to use a kernel <-> user comms model based on Alan Cox's psdev 23 | with a char device using read and write and a mmap area for the block 24 | request data (potentially allowing implementation of zero copy in the 25 | future by mapping the bio into the user address space). 26 | 27 | It is named 'ub' as it was written way before the USB driver although as 28 | I hadn't published my work no one was aware of this. I should come up 29 | with a new name. Perhaps 'bu'? 30 | 31 | This code is from circa ~2002 so don't expect it to compile 32 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | if [ -z "$1" ] 4 | then 5 | echo "Usage: $0 [options]" 6 | exit 1 7 | fi 8 | 9 | IMAGE=$1 10 | shift 11 | 12 | egrep '^ub' /proc/modules >/dev/null 2>&1 13 | if expr $? == 0 > /dev/null ; then 14 | rmmod ub 15 | fi 16 | insmod ./ub.ko || (echo "Failed to load ub.ko"; exit 1) 17 | 18 | UBC_DEV=/dev/ubc0 19 | UBB_DEV=/dev/ub0 20 | 21 | ./ubd $* $UBC_DEV $UBB_DEV $IMAGE 22 | -------------------------------------------------------------------------------- /test_brw.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | 12 | char *opt_ubb_device; 13 | char *opt_image; 14 | static int opt_debug = 0; 15 | static int opt_bufsize = 4096; 16 | static int opt_bufalign = 4096; 17 | static int opt_testoffset = 0; 18 | static char opt_testchar = '!'; 19 | 20 | 21 | static struct option long_options[] = { 22 | {"bufsize", 1, 0, 'b'}, 23 | {"bufalign", 1, 0, 'a'}, 24 | {"testchar", 1, 0, 'c'}, 25 | {"testoffset", 1, 0, 'o'}, 26 | {"debug", 0, 0, 'd'}, 27 | {"help", 0, 0, 'h'}, 28 | {0, 0, 0, 0} 29 | }; 30 | 31 | 32 | static void print_usage(char *prog) 33 | { 34 | fprintf(stderr, "Usage: %s [options] \n\n" 35 | "-h|--help show usage info\n" 36 | "-d|--debug switch on debug output\n" 37 | "-b|--bufsize read/write buffer size\n" 38 | "-a|--bufalign read/write buffer alignment\n" 39 | "-o|--testoffset read/write test offset\n" 40 | "-c|--testchar read/write test character\n", prog); 41 | } 42 | 43 | 44 | static void parse_cmdline(int argc, char **argv) 45 | { 46 | int c, option_index = 0; 47 | 48 | while((c = getopt_long(argc, argv, "hdb:a:o:c:", 49 | long_options, &option_index)) != -1) switch(c) { 50 | 51 | case 'b': 52 | if(sscanf(optarg, "%d", &opt_bufsize) != 1 || opt_bufsize <= 0) { 53 | fprintf(stderr, "bufsize must be a positive integer\n"); 54 | exit(1); 55 | } 56 | break; 57 | case 'a': 58 | if(sscanf(optarg, "%d", &opt_bufalign) != 1 || opt_bufalign <= 0) { 59 | fprintf(stderr, "bufalign must be a positive integer\n"); 60 | exit(1); 61 | } 62 | break; 63 | case 'o': 64 | if(sscanf(optarg, "%d", &opt_testoffset) != 1 || opt_testoffset < 0) { 65 | fprintf(stderr, "testoffset must be a positive integer\n"); 66 | exit(1); 67 | } 68 | break; 69 | case 'c': 70 | if(strlen(optarg) != 1) { 71 | fprintf(stderr, "testchar must be a single character\n"); 72 | exit(1); 73 | } 74 | opt_testchar = optarg[0]; 75 | break; 76 | case 'd': 77 | opt_debug++; 78 | break; 79 | case 'h': 80 | case '?': 81 | print_usage(argv[0]); 82 | exit(1); 83 | } 84 | 85 | if (argc - optind != 2) { 86 | print_usage(argv[0]); 87 | exit(1); 88 | } 89 | 90 | opt_ubb_device = argv[optind]; 91 | opt_image = argv[optind+1]; 92 | 93 | } 94 | 95 | int main(int argc, char **argv) 96 | { 97 | int ub_fd, bi_fd, ret; 98 | char *wrbuf, *rdbuf; 99 | 100 | parse_cmdline(argc, argv); 101 | 102 | /* open the ubb device */ 103 | if((ub_fd = open64(opt_ubb_device, O_RDWR | O_CREAT | O_LARGEFILE | O_DIRECT, 104 | 0644)) < 0) { 105 | perror("test_brw: opening ubb"); 106 | exit(1); 107 | } 108 | 109 | /* open the disk image */ 110 | if((bi_fd = open64(opt_image, O_RDWR | O_CREAT | O_LARGEFILE | O_DIRECT, 111 | 0644)) < 0) { 112 | perror("test_brw: opening image"); 113 | exit(1); 114 | } 115 | 116 | /* allocate buffers */ 117 | wrbuf = memalign(opt_bufalign, opt_bufsize); 118 | rdbuf = memalign(opt_bufalign, opt_bufsize); 119 | if(!wrbuf || !rdbuf) { 120 | perror("test_brw: error allocating buffers"); 121 | exit(1); 122 | } 123 | 124 | /* fill test buffer */ 125 | memset(wrbuf, opt_testchar, opt_bufsize); 126 | 127 | /* write to block image */ 128 | if((ret = pwrite64(ub_fd, wrbuf, opt_bufsize, opt_testoffset)) < 0) { 129 | perror("test_brw: error writing to ubb"); 130 | exit(1); 131 | } 132 | if(ret != opt_bufsize) { 133 | fprintf(stderr, "test_brw: short write: ret=%d", ret); 134 | exit(1); 135 | } 136 | 137 | /* read back from device */ 138 | if((ret = pread64(bi_fd, rdbuf, opt_bufsize, opt_testoffset)) < 0) { 139 | perror("test_brw: error reading from image"); 140 | exit(1); 141 | } 142 | if(ret != opt_bufsize) { 143 | fprintf(stderr, "test_brw: short read: ret=%d", ret); 144 | exit(1); 145 | } 146 | 147 | /* compare read and write buffers */ 148 | if(memcmp(wrbuf, rdbuf, opt_bufsize) != 0) { 149 | fprintf(stderr, "test_brw: read and write buffers differ\n"); 150 | } else { 151 | printf("SUCCESS\n"); 152 | } 153 | 154 | /* clean up */ 155 | close(bi_fd); 156 | close(ub_fd); 157 | free(wrbuf); 158 | free(rdbuf); 159 | 160 | exit(0); 161 | } 162 | -------------------------------------------------------------------------------- /ub.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ub.c - userspace block driver interface 3 | * 4 | * Author: Michael Clark 5 | * 6 | * Based in part on psdev. 7 | * 8 | * An implementation of a loadable kernel mode driver providing 9 | * multiple kernel/user space bidirectional communications links. 10 | * 11 | * This program is free software; you can redistribute it and/or 12 | * modify it under the terms of the GNU General Public License 13 | * version 2 as published by the Free Software Foundation. 14 | * 15 | * TODO 16 | * 17 | * o verify creds 18 | * o mulithreaded requests (mmap buffer management) 19 | * 20 | */ 21 | 22 | /* #define UB_DEBUG 1 */ 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | 45 | #include "ub.h" 46 | 47 | /* module params */ 48 | static int ub_devs = UB_DEVS; 49 | static int ub_hard = UB_HARD; 50 | static int ub_bufs = UB_BUFS; 51 | static int ubc_major = UBC_MAJOR; 52 | static int ubb_major = UBB_MAJOR; 53 | static int ubb_maxsect = UBB_MAXSECTS; 54 | static int ub_reqs = 1024; /* hmmm... */ 55 | 56 | MODULE_LICENSE("GPL"); 57 | MODULE_DESCRIPTION("user space block driver interface"); 58 | MODULE_AUTHOR("Michael Clark "); 59 | 60 | module_param(ub_devs, int, 0); 61 | module_param(ub_hard, int, 0); 62 | module_param(ub_bufs, int, 0); 63 | module_param(ubc_major, int, 0); 64 | module_param(ubb_major, int, 0); 65 | module_param(ubb_maxsect, int, 0); 66 | 67 | MODULE_PARM_DESC(ub_devs, "number of devices to initialise (4)"); 68 | MODULE_PARM_DESC(ub_hard, "1=block or 0=error when userspace dies (1)"); 69 | MODULE_PARM_DESC(ub_bufs, "number of pages for request buffer (32)"); 70 | MODULE_PARM_DESC(ubc_major, "char device major (0=dynamic)"); 71 | MODULE_PARM_DESC(ubb_major, "block device major (0=dynamic)"); 72 | MODULE_PARM_DESC(ubb_maxsect, "maximum number of sectors per request (64)"); 73 | 74 | 75 | static ub_dev *ub_devices = NULL; 76 | static struct class *ubc_class; 77 | 78 | 79 | /* ubc_free_req 80 | * 81 | * Called once userspace has completed a request. 82 | * Called with qlock held and moves request onto free list. 83 | */ 84 | static void ubc_free_req(ub_dev *dev, ub_req *req) 85 | { 86 | if (dev->free_reqs >= ub_reqs) { 87 | kfree(req); 88 | } else { 89 | list_add(&req->chain, dev->free.prev); 90 | dev->free_reqs++; 91 | } 92 | } 93 | 94 | 95 | /* ubc_new_req 96 | * 97 | * Called from ubb_transfer to get a new ub_req. 98 | * 99 | * Called with qlock held and grabs a request from the free list. 100 | */ 101 | static ub_req* ubc_new_req(ub_dev *dev, struct request *breq) 102 | { 103 | ub_req *req = NULL; 104 | int cmd = rq_data_dir(breq); 105 | 106 | if (!list_empty(&dev->free)) { 107 | req = list_entry(dev->free.next, ub_req, chain); 108 | list_del(&req->chain); 109 | dev->free_reqs--; 110 | } 111 | 112 | if (!req) { 113 | spin_unlock_irq(&dev->qlock); 114 | req = kmalloc(sizeof(ub_req), GFP_NOIO); 115 | spin_lock_irq(&dev->qlock); 116 | if (!req) return NULL; 117 | } 118 | 119 | req->breq = breq; 120 | req->in.seq = ++dev->seq; 121 | req->in.cmd = cmd; 122 | req->in.offset = (off64_t)blk_rq_pos(breq) << 9; 123 | req->in.size = blk_rq_sectors(breq) << 9; 124 | req->in.mmap_offset = 0; 125 | 126 | return req; 127 | } 128 | 129 | 130 | static void ub_proc_offset(char *buf, char **start, off_t *offset, int *len) 131 | { 132 | if (*offset == 0) return; 133 | if (*offset >= *len) { 134 | *offset -= *len; 135 | *len = 0; 136 | } else { 137 | *start = buf + *offset; 138 | *offset = 0; 139 | } 140 | } 141 | 142 | 143 | static int ub_read_procmem(char *buf, char **start, off_t offset, 144 | int count, int *eof, void *data) 145 | { 146 | int i, len = 0; 147 | int limit = count - 80; /* Don't print more than this */ 148 | 149 | *start = buf; 150 | len += sprintf(buf+len, 151 | "#dev size blks cuse buse pend proc free bufs\n"); 152 | ub_proc_offset (buf, start, &offset, &len); 153 | for(i=0; isem)) return -ERESTARTSYS; 156 | 157 | len += sprintf(buf+len,"ub%i %i %i %i %i %i %i %i %i\n", 158 | i, dev->size, dev->blksize, 159 | dev->cdev_inuse, dev->bdev_inuse, 160 | dev->pending_reqs, dev->processing_reqs, 161 | dev->free_reqs, dev->num_bufs); 162 | ub_proc_offset (buf, start, &offset, &len); 163 | if (len > limit) goto out; 164 | 165 | out: 166 | up (&dev->sem); 167 | if (len > limit) 168 | break; 169 | } 170 | *eof = 1; 171 | return len; 172 | } 173 | 174 | 175 | static int ubc_open(struct inode *inode, struct file *file) 176 | { 177 | int dev_num = iminor(inode); 178 | ub_dev *dev; /* device information */ 179 | 180 | /* check the device number */ 181 | if (dev_num >= ub_devs) return -ENODEV; 182 | dev = &ub_devices[dev_num]; 183 | 184 | /* make sure only one has me open */ 185 | if (down_interruptible (&dev->sem)) return -ERESTARTSYS; 186 | 187 | if (dev->cdev_inuse) { 188 | up (&dev->sem); 189 | return -EBUSY; 190 | } 191 | dev->cdev_inuse++; 192 | up (&dev->sem); 193 | 194 | /* and use file->private_data to point to the device data */ 195 | file->private_data = dev; 196 | 197 | return 0; 198 | } 199 | 200 | 201 | static int ubc_release(struct inode *inode, struct file *file) 202 | { 203 | ub_dev *dev = file->private_data; 204 | ub_req *req; 205 | struct list_head *pos, *q; 206 | 207 | if (down_interruptible (&dev->sem)) return -ERESTARTSYS; 208 | 209 | if (--dev->cdev_inuse) { 210 | up (&dev->sem); 211 | return 0; 212 | } 213 | up (&dev->sem); 214 | 215 | if (ub_hard) { 216 | 217 | /* move processing requests back onto pending queue */ 218 | PDEBUG("ubc_release: moving processing requests to pending\n"); 219 | spin_lock_irq(&dev->qlock); 220 | list_for_each_safe(pos, q, &dev->processing) { 221 | req = list_entry(pos, ub_req, chain); 222 | list_move(pos, dev->pending.prev); 223 | dev->processing_reqs--; 224 | dev->pending_reqs++; 225 | } 226 | spin_unlock_irq(&dev->qlock); 227 | 228 | } else { 229 | 230 | /* Error out pending and processing requests */ 231 | PDEBUG("ubc_release: error out pending request\n"); 232 | spin_lock_irq(&dev->qlock); 233 | list_for_each_safe(pos, q, &dev->processing) { 234 | req = list_entry(pos, ub_req, chain); 235 | list_del(pos); 236 | dev->processing_reqs--; 237 | __blk_end_request_all(req->breq, -EIO); 238 | ubc_free_req(dev, req); 239 | } 240 | list_for_each_safe(pos, q, &dev->pending) { 241 | req = list_entry(pos, ub_req, chain); 242 | list_del(pos); 243 | dev->pending_reqs--; 244 | __blk_end_request_all(req->breq, -EIO); 245 | ubc_free_req(dev, req); 246 | } 247 | spin_unlock_irq(&dev->qlock); 248 | 249 | } 250 | 251 | PDEBUG("ubc_release: done.\n"); 252 | 253 | return 0; 254 | } 255 | 256 | 257 | static ssize_t ubc_read(struct file *file, char *buf, 258 | size_t nbytes, loff_t *off) 259 | { 260 | DECLARE_WAITQUEUE(wait, current); 261 | ub_dev *dev = file->private_data; 262 | ub_req *req; 263 | ssize_t retval = 0, count = 0; 264 | 265 | if (nbytes == 0) return 0; 266 | 267 | spin_lock_irq(&dev->qlock); 268 | if (!list_empty(&dev->pending)) goto skip_wait; 269 | spin_unlock_irq(&dev->qlock); 270 | 271 | if (file->f_flags & O_NONBLOCK) return -EWOULDBLOCK; 272 | 273 | add_wait_queue(&dev->waitreq, &wait); 274 | set_current_state(TASK_INTERRUPTIBLE); 275 | while (list_empty(&dev->pending)) { 276 | if (file->f_flags & O_NONBLOCK) { 277 | retval = -EAGAIN; 278 | break; 279 | } 280 | if (signal_pending(current)) { 281 | retval = -ERESTARTSYS; 282 | break; 283 | } 284 | schedule(); 285 | } 286 | set_current_state(TASK_RUNNING); 287 | remove_wait_queue(&dev->waitreq, &wait); 288 | if (retval) goto out; 289 | 290 | spin_lock_irq(&dev->qlock); 291 | 292 | skip_wait: 293 | 294 | req = list_entry(dev->pending.next, ub_req, chain); 295 | list_move(&req->chain, dev->processing.prev); 296 | dev->pending_reqs--; 297 | dev->processing_reqs++; 298 | 299 | spin_unlock_irq(&dev->qlock); 300 | 301 | /* Move the input args into userspace */ 302 | count = sizeof(ub_in); 303 | if (nbytes < count) { 304 | printk ("ubc_read: userspace read %ld of %d in message\n", 305 | (long)nbytes, (int)sizeof(ub_in)); 306 | count = nbytes; 307 | } 308 | 309 | if (copy_to_user(buf, &req->in, count)) { 310 | count = 0; 311 | retval = -EFAULT; 312 | goto out; 313 | } 314 | 315 | if (req->in.cmd == UB_WRITE) { 316 | u_long offset = 0; 317 | struct bio_vec *bvec; 318 | struct req_iterator iter; 319 | rq_for_each_segment(bvec, req->breq, iter) { 320 | size_t size = bvec->bv_len; 321 | void *bvec_buf = kmap(bvec->bv_page) + bvec->bv_offset; 322 | int buf_num = offset >> PAGE_SHIFT; 323 | if (buf_num >= dev->num_bufs) BUG(); 324 | memcpy(dev->bufs[buf_num] + offset % PAGE_SIZE, 325 | bvec_buf, size); 326 | kunmap(bvec->bv_page); 327 | offset += size; 328 | } 329 | } 330 | 331 | out: 332 | return (count ? count : retval); 333 | 334 | } 335 | 336 | 337 | static ssize_t ubc_write(struct file *file, const char *buf, 338 | size_t nbytes, loff_t *off) 339 | { 340 | ub_dev *dev = file->private_data; 341 | ub_req *req = NULL; 342 | ub_req *tmp; 343 | ub_out hdr; 344 | ssize_t retval = 0, count = 0; 345 | struct list_head *pos, *q; 346 | 347 | /* Peek at the seq */ 348 | if (copy_from_user(&hdr, buf, 2 * sizeof(u_long))) 349 | return -EFAULT; 350 | 351 | PDEBUG("ubc_write: pid %d read request id %d result=%d\n", 352 | current->pid, hdr.seq, hdr.result); 353 | 354 | /* Look for the message on the processing queue. */ 355 | spin_lock_irq(&dev->qlock); 356 | list_for_each_safe(pos, q, &dev->processing) { 357 | tmp = list_entry(pos, ub_req, chain); 358 | if (tmp->in.seq == hdr.seq) { 359 | req = tmp; 360 | list_del(pos); 361 | dev->processing_reqs--; 362 | break; 363 | } 364 | } 365 | spin_unlock_irq(&dev->qlock); 366 | 367 | if (!req) { 368 | printk("ubc_write: request id %d not found\n", hdr.seq); 369 | return -ESRCH; 370 | } 371 | 372 | PDEBUG("found request id %d on queue!\n", hdr.seq); 373 | 374 | /* move data into response buffer. */ 375 | if (sizeof(ub_out) < nbytes) { 376 | printk("ubc_write: too much data: %d:%ld, request id %d\n", 377 | (int)sizeof(ub_out), (long)nbytes, hdr.seq); 378 | nbytes = sizeof(ub_out); /* don't have more space! */ 379 | } 380 | if (copy_from_user(&req->out, buf, nbytes)) { 381 | /* error out request */ 382 | blk_end_request_all(req->breq, -EIO); 383 | retval = -EFAULT; 384 | goto out; 385 | } 386 | count = nbytes; 387 | 388 | if (req->in.cmd == UB_READ) { 389 | u_long offset = 0; 390 | struct bio_vec *bvec; 391 | struct req_iterator iter; 392 | rq_for_each_segment(bvec, req->breq, iter) { 393 | size_t size = bvec->bv_len; 394 | void *bvec_buf = kmap(bvec->bv_page) + bvec->bv_offset; 395 | int buf_num = offset >> PAGE_SHIFT; 396 | if (buf_num >= dev->num_bufs) BUG(); 397 | memcpy(bvec_buf, 398 | dev->bufs[buf_num] + offset % PAGE_SIZE, size); 399 | kunmap(bvec->bv_page); 400 | offset += size; 401 | } 402 | } 403 | 404 | if (req->in.cmd == UB_READ || req->in.cmd == UB_WRITE) { 405 | blk_end_request_all(req->breq, req->out.result ? 0 : -EIO); 406 | } 407 | 408 | out: 409 | spin_lock_irq(&dev->qlock); 410 | ubc_free_req(dev, req); 411 | spin_unlock_irq(&dev->qlock); 412 | 413 | return(count ? count : retval); 414 | } 415 | 416 | 417 | static unsigned int ubc_poll(struct file *file, poll_table * wait) 418 | { 419 | ub_dev *dev = file->private_data; 420 | unsigned int mask = POLLOUT | POLLWRNORM; 421 | 422 | poll_wait(file, &dev->waitreq, wait); 423 | 424 | if (!list_empty(&dev->pending)) 425 | mask |= POLLIN | POLLRDNORM; 426 | 427 | return mask; 428 | } 429 | 430 | 431 | static int ubc_ioctl(struct inode *inode, struct file *file, 432 | unsigned int cmd, u_long arg) 433 | { 434 | int err= 0, ret = 0; 435 | ub_dev *dev = (ub_dev*)file->private_data; 436 | 437 | if (_IOC_TYPE(cmd) != UBC_IOC_MAGIC) return -ENOTTY; 438 | if (_IOC_NR(cmd) > UBC_IOC_MAXNR) return -ENOTTY; 439 | if (_IOC_DIR(cmd) & _IOC_READ) 440 | err = !access_ok(VERIFY_WRITE, (void *)arg, _IOC_SIZE(cmd)); 441 | else if (_IOC_DIR(cmd) & _IOC_WRITE) 442 | err = !access_ok(VERIFY_READ, (void *)arg, _IOC_SIZE(cmd)); 443 | if (err) return -EFAULT; 444 | 445 | switch(cmd) { 446 | 447 | case UBC_IOCSSIZE: 448 | ret = __get_user(dev->size, (int*)arg); 449 | set_capacity(dev->gd, dev->size << 1); 450 | break; 451 | 452 | case UBC_IOCGSIZE: 453 | ret = __put_user(dev->size, (int*)arg); 454 | break; 455 | 456 | default: 457 | return -ENOTTY; 458 | } 459 | 460 | return ret; 461 | } 462 | 463 | 464 | static void ubc_vma_open(struct vm_area_struct *vma) 465 | { 466 | ub_dev *dev = (ub_dev *)vma->vm_private_data; 467 | dev->vmas++; 468 | } 469 | 470 | 471 | static void ubc_vma_close(struct vm_area_struct *vma) 472 | { 473 | ub_dev *dev = (ub_dev *)vma->vm_private_data; 474 | dev->vmas--; 475 | } 476 | 477 | 478 | static int ubc_vma_fault(struct vm_area_struct *vma, 479 | struct vm_fault *vmf) 480 | { 481 | ub_dev *dev = (ub_dev *)vma->vm_private_data; 482 | u_long offset = vmf->pgoff; 483 | struct page *page = NULL; 484 | void *pageptr = NULL; /* default to "missing" */ 485 | 486 | PDEBUG( "ubc_vma_fault: fault @ %08lx [vma %08lx-%08lx] offset=%ld\n", 487 | (u_long)vmf->virtual_address, 488 | vma->vm_start, vma->vm_end, offset); 489 | 490 | down(&dev->sem); 491 | 492 | if (offset >= dev->num_bufs) goto err; /* out of range */ 493 | 494 | if (dev && dev->bufs && dev->bufs[offset]) pageptr = dev->bufs[offset]; 495 | 496 | if (!pageptr) goto err; /* hole or end-of-file */ 497 | 498 | /* got it, now increment the count */ 499 | page = virt_to_page(pageptr); 500 | get_page(page); 501 | 502 | vmf->page = page; 503 | 504 | up(&dev->sem); 505 | return 0; 506 | 507 | err: 508 | up(&dev->sem); 509 | return VM_FAULT_ERROR; 510 | } 511 | 512 | 513 | static struct vm_operations_struct ubc_vm_ops = { 514 | .open = ubc_vma_open, 515 | .close = ubc_vma_close, 516 | .fault = ubc_vma_fault 517 | }; 518 | 519 | 520 | static int ubc_mmap(struct file *file, struct vm_area_struct *vma) 521 | { 522 | ub_dev *dev = (ub_dev*)file->private_data; 523 | 524 | if ((vma->vm_pgoff << PAGE_SHIFT) & (PAGE_SIZE-1)) 525 | return -ENXIO; /* need aligned offsets */ 526 | 527 | /* don't do anything here: "fault" will fill the holes */ 528 | vma->vm_ops = &ubc_vm_ops; 529 | vma->vm_flags |= VM_RESERVED; 530 | vma->vm_private_data = dev; 531 | ubc_vma_open(vma); 532 | dev->vma = vma; 533 | 534 | return 0; 535 | } 536 | 537 | 538 | static int ubb_open(struct block_device *bdev, fmode_t mode) 539 | { 540 | ub_dev *dev = bdev->bd_disk->private_data; 541 | 542 | down(&dev->sem); 543 | dev->bdev_inuse++; 544 | if (! dev->bdev) dev->bdev = bdev; 545 | /* bdget(inode->i_bdev) */ 546 | up(&dev->sem); 547 | return 0; 548 | } 549 | 550 | 551 | static int ubb_release(struct gendisk *gendisk, fmode_t mode) 552 | { 553 | ub_dev *dev = gendisk->private_data; 554 | 555 | down(&dev->sem); 556 | dev->bdev_inuse--; 557 | /* bdput(inode->i_bdev) */ 558 | up(&dev->sem); 559 | 560 | return 0; 561 | } 562 | 563 | 564 | int ubb_check_transfer_size(ub_dev *dev, int sector) 565 | { 566 | if (sector > (dev->size << 1)) { 567 | static int count = 0; 568 | if (count++ < 5) 569 | printk(KERN_WARNING 570 | "ub: request past end of device\n"); 571 | return -EINVAL; 572 | } 573 | return 0; 574 | } 575 | 576 | 577 | /* ubb_transfer 578 | * 579 | * Add a request to the userspace queue. Called from ubb_request 580 | * with qlock held 581 | */ 582 | static int ubb_transfer(ub_dev *dev, struct request *breq) 583 | { 584 | ub_req *req; 585 | 586 | if (ubb_check_transfer_size(dev, blk_rq_pos(breq) + 587 | blk_rq_sectors(breq))) 588 | return -EINVAL; 589 | 590 | /* Build a request */ 591 | req = ubc_new_req(dev, breq); 592 | 593 | PDEBUG("ubb_transfer: cmd=%d %lld:%d\n", 594 | req->in.cmd, req->in.offset, req->in.size); 595 | 596 | if (!req) { 597 | printk("Failed to allocate ub_req structure\n"); 598 | return -ENOMEM; 599 | } 600 | 601 | /* Append msg to pending queue */ 602 | list_add(&(req->chain), dev->pending.prev); 603 | dev->pending_reqs++; 604 | 605 | /* Wake up userspace if it is around */ 606 | spin_unlock_irq(&dev->qlock); 607 | if (dev->cdev_inuse && waitqueue_active(&dev->waitreq)) { 608 | wake_up(&dev->waitreq); /* poll race ??? */ 609 | } 610 | spin_lock_irq(&dev->qlock); 611 | 612 | return 0; 613 | } 614 | 615 | 616 | /* ubb_request 617 | * 618 | * This is the main block layer request function. It shifts requests 619 | * from the request queue to the userspace queue and then wakes up 620 | * userspace. If userspace is not around, it will either IO error the 621 | * requests of queue them to the userspace queue and skip the wakeup. 622 | * 623 | * Called from block layer with qlock held. 624 | */ 625 | static void ubb_request(struct request_queue *q) 626 | { 627 | ub_dev *dev = (ub_dev*)q->queuedata; 628 | struct request *req; 629 | int ret; 630 | 631 | if (dev->busy) return; 632 | dev->busy = 1; 633 | 634 | /* when not hard we do IO errors if userspace is not around */ 635 | if (!dev->cdev_inuse && !ub_hard) { 636 | /* Error out the requests, userspace is not there */ 637 | while ((req = blk_fetch_request(q)) != NULL) 638 | __blk_end_request_all(req, -EIO); 639 | } 640 | 641 | /* Move the requests to the userspace queue */ 642 | while ((req = blk_fetch_request(q)) != NULL) { 643 | if (! blk_fs_request(req)) { 644 | __blk_end_request_all(req, -EIO); 645 | continue; 646 | } 647 | /* Transfer one item onto the userspace queue */ 648 | ret = ubb_transfer(dev, req); 649 | PDEBUG("DIR=%d\n", rq_data_dir(req)); 650 | if (ret < 0) 651 | __blk_end_request_all(req, -EIO); 652 | } 653 | 654 | dev->busy = 0; 655 | } 656 | 657 | 658 | static struct file_operations ubc_fops = { 659 | .owner = THIS_MODULE, 660 | .read = ubc_read, 661 | .write = ubc_write, 662 | .ioctl = ubc_ioctl, 663 | .mmap = ubc_mmap, 664 | .open = ubc_open, 665 | .poll = ubc_poll, 666 | .release = ubc_release, 667 | }; 668 | 669 | 670 | static struct block_device_operations ubb_bdops = { 671 | .owner = THIS_MODULE, 672 | .open = ubb_open, 673 | .release = ubb_release, 674 | }; 675 | 676 | 677 | static int __init ub_init(void) 678 | { 679 | int i, j; 680 | int result = -ENOMEM; /* for the possible errors */ 681 | struct list_head *pos, *q; 682 | 683 | if ((result = register_blkdev(ubb_major, "ub")) < 0) { 684 | goto fail_reg_blk; 685 | } 686 | if (ubb_major == 0) ubb_major = result; /* dynamic */ 687 | 688 | if ((result = register_chrdev(ubc_major, "ubc", &ubc_fops)) < 0) { 689 | goto fail_reg_char; 690 | } 691 | if (ubc_major == 0) ubc_major = result; /* dynamic */ 692 | 693 | ubc_class = class_create(THIS_MODULE, "ubc"); 694 | if (IS_ERR(ubc_class)) { 695 | goto fail_reg_class; 696 | } 697 | 698 | ub_devices = kmalloc(ub_devs * sizeof (ub_dev), GFP_KERNEL); 699 | memset(ub_devices, 0, ub_devs * sizeof (ub_dev)); 700 | if (!ub_devices) { 701 | goto fail_devices; 702 | } 703 | 704 | for (i=0; i < ub_devs; i++) { 705 | ub_dev *dev = &ub_devices[i]; 706 | 707 | /* userspace queue initialisation */ 708 | init_waitqueue_head(&dev->waitreq); 709 | init_waitqueue_head(&dev->waitfree); 710 | dev->seq = 0; 711 | INIT_LIST_HEAD(&dev->pending); 712 | INIT_LIST_HEAD(&dev->processing); 713 | INIT_LIST_HEAD(&dev->free); 714 | for(j=0; j< ub_reqs; j++) { 715 | ub_req *req; 716 | req = kmalloc(sizeof(char*)*ub_bufs, GFP_KERNEL); 717 | if (req) { 718 | dev->free_reqs++; 719 | list_add(&req->chain, dev->free.prev); 720 | } 721 | } 722 | dev->bufs = kmalloc(sizeof(char*)*ub_bufs, GFP_KERNEL); 723 | if (!dev->bufs) goto fail_disks; 724 | memset(dev->bufs, 0, sizeof(char*)*ub_bufs); 725 | for(j=0; j < ub_bufs; j++) { 726 | dev->bufs[j] = 727 | (void *)__get_free_pages(GFP_KERNEL, 0); 728 | if (!dev->bufs[j]) goto fail_disks; 729 | } 730 | 731 | /* char dev initialisation */ 732 | sema_init (&dev->sem, 1); 733 | dev->num_bufs = ub_bufs; 734 | device_create(ubc_class, NULL, 735 | MKDEV(ubc_major, i), NULL, "ubc%d", i); 736 | 737 | /* block dev initialisation */ 738 | PDEBUG("ub_init: initialise queue on ub%d\n", i); 739 | spin_lock_init(&dev->qlock); 740 | dev->size = 0; 741 | dev->blksize = UBB_BLKSIZE; 742 | dev->maxsect = ubb_maxsect; 743 | dev->bdev = NULL; 744 | dev->queue = blk_init_queue(&ubb_request, &dev->qlock); 745 | if (!dev->queue) goto fail_disks; 746 | dev->queue->queuedata = dev; 747 | blk_queue_max_sectors(dev->queue, dev->maxsect); 748 | blk_queue_logical_block_size(dev->queue, UBB_HARDSECT); 749 | blk_queue_max_hw_segments(dev->queue, dev->maxsect); 750 | blk_queue_max_phys_segments(dev->queue, dev->maxsect); 751 | 752 | PDEBUG("ub_init: adding gendisk ub%d\n", i); 753 | if (!(dev->gd = alloc_disk(UBB_MINORS))) goto fail_disks; 754 | dev->gd->first_minor = i << UBB_MINOR_SHIFT; 755 | dev->gd->queue = dev->queue; 756 | dev->gd->major = ubb_major; 757 | dev->gd->fops = &ubb_bdops; 758 | dev->gd->private_data = dev; 759 | sprintf(dev->gd->disk_name, "ub%d", i); 760 | set_capacity(dev->gd, 0); 761 | 762 | add_disk(dev->gd); 763 | } 764 | 765 | create_proc_read_entry("ub", 0, NULL, ub_read_procmem, NULL); 766 | 767 | printk ("<1>ub: initialised %d devs\n", ub_devs); 768 | 769 | return 0; /* succeed */ 770 | 771 | fail_disks: 772 | for (i=0; i < ub_devs; i++) { 773 | ub_dev *dev = &ub_devices[i]; 774 | ub_req *req; 775 | 776 | list_for_each_safe(pos, q, &dev->free) { 777 | req = list_entry(pos, ub_req, chain); 778 | list_del(pos); 779 | kfree(req); 780 | } 781 | if (dev->queue) { 782 | PDEBUG("ub_init: cleanup queue ub%d\n", i); 783 | blk_cleanup_queue(dev->queue); 784 | } 785 | if (dev->gd) { 786 | PDEBUG("ub_init: removing gendisk ub%d\n", i); 787 | del_gendisk(dev->gd); 788 | put_disk(dev->gd); 789 | } 790 | if (dev->bufs) { 791 | for(j=0; j < dev->num_bufs; j++) 792 | if (dev->bufs[j]) 793 | free_pages((u_long)dev->bufs[j], 0); 794 | kfree(dev->bufs); 795 | } 796 | } 797 | kfree(ub_devices); 798 | fail_devices: 799 | class_destroy(ubc_class); 800 | fail_reg_class: 801 | unregister_chrdev(ubc_major, "ubc"); 802 | fail_reg_char: 803 | unregister_blkdev(ubb_major, "ub"); 804 | fail_reg_blk: 805 | 806 | return result; 807 | } 808 | 809 | 810 | static void __exit ub_cleanup(void) 811 | { 812 | int i; 813 | struct list_head *pos, *q; 814 | 815 | for (i=0; i < ub_devs; i++) { 816 | ub_dev *dev = &ub_devices[i]; 817 | ub_req *req; 818 | 819 | if (dev->gd) { 820 | PDEBUG("ub_init: removing gendisk ub%d\n", i); 821 | del_gendisk(dev->gd); 822 | put_disk(dev->gd); 823 | } 824 | if (dev->queue) { 825 | PDEBUG("ub_init: cleanup queue on ub%d\n", i); 826 | blk_cleanup_queue(dev->queue); 827 | } 828 | 829 | if (dev->bufs) { 830 | int j; 831 | for(j=0; j < dev->num_bufs; j++) 832 | if (dev->bufs[j]) 833 | free_pages((u_long)dev->bufs[j], 0); 834 | kfree(dev->bufs); 835 | } 836 | /* free all request lists */ 837 | list_for_each_safe(pos, q, &dev->processing) { 838 | req = list_entry(pos, ub_req, chain); 839 | list_del(pos); 840 | printk(KERN_WARNING "ub: freeing request " 841 | "in processing list - shouldn't happen"); 842 | kfree(req); 843 | } 844 | list_for_each_safe(pos, q, &dev->pending) { 845 | req = list_entry(pos, ub_req, chain); 846 | list_del(pos); 847 | printk(KERN_WARNING "ub: freeing request " 848 | "in pending list - shouldn't happen"); 849 | kfree(req); 850 | } 851 | list_for_each_safe(pos, q, &dev->free) { 852 | req = list_entry(pos, ub_req, chain); 853 | list_del(pos); 854 | kfree(req); 855 | } 856 | 857 | device_destroy(ubc_class, MKDEV(ubc_major, i)); 858 | } 859 | 860 | unregister_blkdev(ubb_major, "ub"); 861 | unregister_chrdev(ubc_major, "ubc"); 862 | 863 | class_destroy(ubc_class); 864 | 865 | remove_proc_entry("ub", 0); 866 | 867 | kfree(ub_devices); 868 | } 869 | 870 | 871 | module_init(ub_init); 872 | module_exit(ub_cleanup); 873 | -------------------------------------------------------------------------------- /ub.h: -------------------------------------------------------------------------------- 1 | #ifndef __UB_H__ 2 | #define __UB_H__ 3 | 4 | #define UB_DEVS 4 /* ubc0 through ubc3, ub0 through ub3 */ 5 | #define UB_HARD 1 /* 1 = block requests if userspace dies 6 | 0 = instead of error out requests */ 7 | #define UB_BUFS 32 /* mmap buffer pages */ 8 | #define UBC_MAJOR 0 /* dynamic major by default */ 9 | #define UBB_MAJOR 0 /* dynamic major by default */ 10 | #define UBB_MINORS 16 11 | #define UBB_MINOR_SHIFT 4 12 | #define UBB_BLKSIZE 512 /* 512 byte block size */ 13 | #define UBB_HARDSECT 512 /* 512 byte hard sectors */ 14 | #define UBB_MAXSECTS 64 /* max sectors per request */ 15 | 16 | #ifdef __KERNEL__ 17 | 18 | 19 | #ifdef UB_DEBUG 20 | #define PDEBUG(fmt, args...) printk( KERN_WARNING "ub: " fmt, ## args) 21 | #else 22 | #define PDEBUG(fmt, args...) /* not debugging: nothing */ 23 | #endif 24 | 25 | 26 | typedef struct ub_dev { 27 | /* chardev stuff */ 28 | int cdev_inuse; 29 | struct semaphore sem; 30 | void **bufs; 31 | int num_bufs; 32 | struct vm_area_struct *vma; 33 | int vmas; 34 | 35 | /* bdev --> userspace request queue */ 36 | u_long seq; 37 | wait_queue_head_t waitreq; 38 | spinlock_t qlock; 39 | struct list_head pending; 40 | struct list_head processing; 41 | struct list_head free; 42 | 43 | /* blockdev stuff */ 44 | int bdev_inuse; 45 | int size; /* size is in 1K units */ 46 | int blksize; 47 | int maxsect; 48 | struct request_queue *queue; 49 | struct gendisk *gd; 50 | struct block_device *bdev; 51 | int busy; 52 | make_request_fn *real_make_request; 53 | wait_queue_head_t waitfree; 54 | 55 | /* statistics*/ 56 | int free_reqs; 57 | int pending_reqs; 58 | int processing_reqs; 59 | 60 | } ub_dev; 61 | 62 | #endif 63 | 64 | /* userspace communications */ 65 | 66 | #ifdef __KERNEL__ 67 | typedef long long off64_t; 68 | #endif 69 | 70 | typedef struct ub_in { 71 | int seq; 72 | int cmd; 73 | off64_t offset; 74 | size_t size; 75 | size_t mmap_offset; /* unimplemented multithreaded interface */ 76 | } ub_in; 77 | 78 | typedef struct ub_out { 79 | int seq; 80 | int result; 81 | } ub_out; 82 | 83 | typedef union { 84 | ub_in in; 85 | ub_out out; 86 | } ubc_req; 87 | 88 | #ifdef __KERNEL__ 89 | 90 | typedef struct ub_req { 91 | struct list_head chain; 92 | struct ub_in in; 93 | struct ub_out out; 94 | struct request* breq; 95 | } ub_req; 96 | 97 | #endif 98 | 99 | /* cmds */ 100 | #define UB_READ 0 101 | #define UB_WRITE 1 102 | 103 | 104 | /* ioctls */ 105 | #define UBC_IOC_MAGIC 'u' 106 | #define UBC_IOCRESET _IO(UBC_IOC_MAGIC, 0) 107 | #define UBC_IOCSSIZE _IOW(UBC_IOC_MAGIC, 1, int) 108 | #define UBC_IOCGSIZE _IOR(UBC_IOC_MAGIC, 2, int) 109 | #define UBC_IOC_MAXNR 2 110 | 111 | #endif 112 | -------------------------------------------------------------------------------- /ubd.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #include "ub.h" 19 | 20 | 21 | static volatile int running; 22 | 23 | static void sig_handler(int sig) 24 | { 25 | switch(sig) { 26 | case SIGTERM: 27 | case SIGINT: 28 | running = 0; 29 | fprintf(stderr,"caught signal, exiting after current request\n"); 30 | break; 31 | } 32 | } 33 | 34 | 35 | /* command line options */ 36 | char *opt_ubc_device; 37 | char *opt_ubb_device; 38 | char *opt_image; 39 | static int opt_nice = 1; 40 | static int opt_size = 0; 41 | static int opt_debug = 0; 42 | 43 | 44 | static struct option long_options[] = { 45 | {"size", 1, 0, 's'}, 46 | {"nice", 1, 0, 'n'}, 47 | {"debug", 0, 0, 'd'}, 48 | {"help", 0, 0, 'h'}, 49 | {0, 0, 0, 0} 50 | }; 51 | 52 | 53 | static void print_usage(char *prog) 54 | { 55 | fprintf(stderr, "Usage: %s [options] \n\n" 56 | "-h|--help show usage info\n" 57 | "-d|--debug switch on debug output\n" 58 | "-n|--nice nice level\n" 59 | "-s|--size size in kilobytes of the image\n", prog); 60 | } 61 | 62 | 63 | static void parse_cmdline(int argc, char **argv) 64 | { 65 | int c, option_index = 0; 66 | 67 | while ((c = getopt_long(argc, argv, "hdn:s:", 68 | long_options, &option_index)) != -1) switch(c) { 69 | 70 | case 's': 71 | if (sscanf(optarg, "%d", &opt_size) != 1 || opt_size <= 0) { 72 | fprintf(stderr, "size must be a positive integer\n"); 73 | exit(1); 74 | } 75 | break; 76 | case 'n': 77 | if (sscanf(optarg, "%d", &opt_nice) != 1) { 78 | fprintf(stderr, "nice level must be an integer\n"); 79 | exit(1); 80 | } 81 | break; 82 | case 'd': 83 | opt_debug++; 84 | break; 85 | case 'h': 86 | case '?': 87 | print_usage(argv[0]); 88 | exit(1); 89 | } 90 | 91 | if (argc - optind != 3) { 92 | print_usage(argv[0]); 93 | exit(1); 94 | } 95 | 96 | opt_ubc_device = argv[optind]; 97 | opt_ubb_device = argv[optind+1]; 98 | opt_image = argv[optind+2]; 99 | } 100 | 101 | 102 | int main(int argc, char **argv) 103 | { 104 | int ub_fd, bi_fd, ret; 105 | off_t b; 106 | char *buf; 107 | ubc_req req; 108 | struct pollfd pevent[1]; 109 | int rr_pid; 110 | 111 | parse_cmdline(argc, argv); 112 | 113 | /* only exit while between requests */ 114 | signal(SIGINT, sig_handler); 115 | signal(SIGTERM, sig_handler); 116 | 117 | /* open the ub char device */ 118 | if ((ub_fd = open(opt_ubc_device, O_RDWR)) < 0) { 119 | perror("ubd: opening ubc device"); 120 | exit(1); 121 | } 122 | 123 | /* open the disk image */ 124 | if ((bi_fd = open64(opt_image, O_RDWR | O_CREAT | O_LARGEFILE | O_DIRECT, 125 | 0644)) < 0) { 126 | perror("ubd: opening image"); 127 | exit(1); 128 | } 129 | 130 | /* find it's size if not specified */ 131 | if (!opt_size) { 132 | if (ioctl(bi_fd, BLKGETSIZE, &opt_size) == 0) { 133 | opt_size /= 2; /* assume 512 byte hardware sectors */ 134 | } else if ((b = lseek(bi_fd, 0, SEEK_END)) > 0) { 135 | opt_size = b / 1024; 136 | } else { 137 | fprintf(stderr, "ubd: can't detect size of block image\n"); 138 | exit(1); 139 | } 140 | } 141 | 142 | /* Set the size of the ub device */ 143 | if (ioctl(ub_fd, UBC_IOCSSIZE, &opt_size) < 0) { 144 | perror("ubb: ioctl(UBC_IOCSSIZE)"); 145 | exit(1); 146 | } 147 | 148 | /* map the read/write buffer of ub char device */ 149 | if ((buf = mmap(NULL, UB_BUFS * PAGE_SIZE, PROT_READ | PROT_WRITE, 150 | MAP_SHARED, ub_fd, 0)) == (void*)-1) { 151 | perror("ubd: mmap"); 152 | exit(1); 153 | } 154 | 155 | /* lock me into memory */ 156 | if (mlockall(MCL_CURRENT|MCL_FUTURE)<0) { 157 | perror("mlockall"); 158 | exit(1); 159 | } 160 | 161 | /* make sure I run with a high priority */ 162 | if (nice(opt_nice) < 0) { 163 | perror("ubd: nice"); 164 | exit(1); 165 | } 166 | 167 | printf("image : %s\n", opt_image); 168 | printf("size : %dK\n", opt_size); 169 | printf("vm : %p\n", buf); 170 | 171 | /* Get the kernel to reread the partition table */ 172 | if ((rr_pid = fork()) == 0) { 173 | int bd; 174 | if ((bd = open(opt_ubb_device, O_RDWR)) < 0) { 175 | printf("couldn't open block device\n"); 176 | exit(0); 177 | } 178 | if (ioctl(bd, BLKRRPART) < 0) { 179 | printf("error reading partition table\n"); 180 | } 181 | close(bd); 182 | exit(0); 183 | } 184 | 185 | /* okay, start polling the ub char device for requests */ 186 | pevent[0].fd = ub_fd; 187 | pevent[0].events = POLLIN; 188 | running = 1; 189 | while (1) { 190 | if (!running) { 191 | exit(0); 192 | } 193 | if (rr_pid) { 194 | /* we only end up reaping after getting an event */ 195 | int status; 196 | if (waitpid(rr_pid, &status, WNOHANG) == rr_pid) 197 | rr_pid = 0; 198 | } 199 | 200 | #if 1 201 | while ((ret = poll(pevent, 1, -1)) < 0 && errno == EINTR) { 202 | if (!running) 203 | break; 204 | if (opt_debug) 205 | printf("ubd: ubc poll was interuppted, retrying.\n"); 206 | } 207 | if (!running) break; 208 | if (ret < 0 && errno != EINTR) { 209 | perror("ubd: ubc poll"); 210 | continue; 211 | } else if (!(pevent[0].events & POLLIN)) continue; 212 | #endif 213 | 214 | /* Read the request from ubc */ 215 | while ((ret = read(ub_fd, &req, sizeof(req.in))) < 0 && errno == EINTR) { 216 | if (opt_debug) 217 | printf("ubd: ubc read was interuppted, retrying.\n"); 218 | } 219 | if (ret < 0 && errno != EINTR) { 220 | perror("ubd: ubc read"); 221 | continue; 222 | } else if (ret != sizeof(req.in)) { 223 | fprintf(stderr, "ubd: read request was to short\n"); 224 | continue; 225 | } 226 | 227 | switch(req.in.cmd) { 228 | 229 | 230 | case UB_READ: 231 | 232 | if (opt_debug) 233 | printf("request[%d]: READ offset=%" PRId64 " size=%zd\n", 234 | req.in.seq, req.in.offset, req.in.size); 235 | b = 0; 236 | while (b != req.in.size) { 237 | while ((ret = pread64(bi_fd, 238 | buf + req.in.mmap_offset + b, 239 | req.in.size - b, 240 | req.in.offset + b)) < 0 && errno == EINTR) { 241 | if (opt_debug) 242 | printf("ubd: img read was interuppted, retrying.\n"); 243 | } 244 | if (ret < 0 && errno != EINTR) { 245 | perror("ubd: image read"); 246 | break; 247 | } 248 | b += ret; 249 | } 250 | req.out.result = (b == req.in.size); 251 | break; 252 | 253 | 254 | case UB_WRITE: 255 | 256 | if (opt_debug) 257 | printf("request[%d]: WRITE offset=%" PRId64 " size=%zd\n", 258 | req.in.seq, req.in.offset, req.in.size); 259 | b = 0; 260 | while (b != req.in.size) { 261 | while ((ret = pwrite64(bi_fd, 262 | buf + req.in.mmap_offset + b, 263 | req.in.size - b, 264 | req.in.offset + b)) < 0 && errno == EINTR) { 265 | if (opt_debug) 266 | printf("ubd: img write was interuppted, retrying.\n"); 267 | } 268 | if (ret < 0 && errno != EINTR) { 269 | perror("ubd: image write"); 270 | break; 271 | } 272 | b += ret; 273 | } 274 | req.out.result = (b == req.in.size); 275 | break; 276 | 277 | default: 278 | printf("request[%d]: cmd=%d unknown\n", 279 | req.in.seq, req.in.cmd); 280 | break; 281 | 282 | } 283 | 284 | /* Write the response back to ubc */ 285 | while ((ret = write(ub_fd, &req, sizeof(req.out))) < 0 && errno == EINTR) { 286 | if (opt_debug) 287 | printf("ubd: ubc write was interuppted, retrying.\n"); 288 | } 289 | if (ret < 0 && errno != EINTR) { 290 | perror("ubd: ubc write"); 291 | continue; 292 | } else if (ret != sizeof(req.out)) { 293 | fprintf(stderr, "ubd: write response was to short\n"); 294 | continue; 295 | } 296 | 297 | } 298 | 299 | return 0; 300 | } 301 | --------------------------------------------------------------------------------