[ENBD] 2.6.0 support
Peter T. Breuer
ptb at it.uc3m.es
Fri Jan 16 02:24:29 MST 2004
"Also sprach Tim ter Laak:"
> Incidentally, I thought I sent my (mostly working) 2.6.0 patch to the list
> yesterday. Perhaps they didn't get through because they were attached?
Mmm possibly. There are about 10-20 spams "for my attention" every day,
and I never attend to them! So I daresay yours is trapped with the
others that need attention in the administrative limbo that results from
my unwillingness to deal with dozens of posts over a web interface.
Sorry!
:-(
And many thanks for this post!
> Well, here they are inlined. It's one incremental kernel patch (so it goes
> over the test2 patch), and one small patch for the userspace utils. Apart
> from the kdev_t issue, there was an issue with the _IO{R|W} macros, and
> blk_init_queue now allocates memory for the queue itself.
>
> With the following patches, ENBD for 2.6.0 compiles, modules load and make
> test mostly works. Only some ioctl tests appear to fail :-( I don't know
> yet why this is.
It's possibly because I never ported the remote ioctl work! I'll look
and check.
> ---begin kernelpatch ---
>
> diff -Naur linux-2.6.0-orig/drivers/block/enbd/enbd_base.c linux-2.6.0-enbd/drivers/block/enbd/enbd_base.c
> --- linux-2.6.0-orig/drivers/block/enbd/enbd_base.c 2004-01-15 13:31:03.000000000 +0100
> +++ linux-2.6.0-enbd/drivers/block/enbd/enbd_base.c 2004-01-15 13:28:57.000000000 +0100
> @@ -530,7 +530,7 @@
> goto fail;
> }
>
> - minor = minor (inode->i_rdev);
> + minor = iminor (inode);
OK - that's interesting. I translated that as MINOR(inode->i_rdev)
last night. So "iminor" is a function for getting minors out of inodes!
> nbd = minor >> ENBD_SHIFT;
>
> // PTB sync_dev is async. fsync_dev is sync.
> @@ -678,7 +678,7 @@
> return -EINVAL;
> }
>
> - dev = minor (inode->i_rdev);
> + dev = iminor (inode);
Ditto.
> nbd = dev >> ENBD_SHIFT;
> part = dev - (nbd << ENBD_SHIFT);
> islot = part - 1;
> @@ -2219,7 +2219,7 @@
> struct enbd_md *md = &enbd_md;
>
> for (j = 0; j - 1 < lo->nslot; j++) {
> - kdev_t enbd_dev = mk_kdev (major, j + (lo->nbd << ENBD_SHIFT));
> + dev_t enbd_dev = MKDEV (major, j + (lo->nbd << ENBD_SHIFT));
Yes. I did that too.
> if (j == 0) {
> if (atomic_read (&lo->md_count) <= 0)
> continue;
> @@ -3258,12 +3258,12 @@
> ENBD_ERROR ("given bad inode.\n");
> return -EINVAL;
> }
> - if (major (inode->i_rdev) != major) {
> + if (imajor (inode) != major) {
Yes, I translated that as MAJOR(inode->i_rdev).
> ENBD_ERROR ("pseudo-major %d != %d\n",
> - major (inode->i_rdev), major);
> + imajor (inode), major);
ditto.
> return -ENODEV;
> }
> - minor = minor (inode->i_rdev);
> + minor = iminor (inode);
Similar.
> nbd = minor >> ENBD_SHIFT;
> if (nbd >= MAX_NBD) {
> ENBD_ERROR ("tried to open too many devices, %d\n", minor);
> @@ -3563,7 +3563,7 @@
> return 0;
>
> #ifndef BLKMDNTFY
> -#define BLKMDNTFY _IOW(0x12,133,sizeof(int))
> +#define BLKMDNTFY _IOW(0x12,133,int)
Correct. (blush). I had to do that too. They put an abuse detector
in the macro. Took me some time looking at it to understand where the
complaint about "[" in a case came from!
> #endif
> case BLKMDNTFY:
> ENBD_INFO ("received BLKMDNTFY, am now in raid %x\n",
> @@ -3583,7 +3583,7 @@
> return 0;
>
> #ifndef BLKMDUNTFY
> -#define BLKMDUNTFY _IOW(0x12,134,sizeof(int))
> +#define BLKMDUNTFY _IOW(0x12,134,int)
ditto.
> #endif
> case BLKMDUNTFY:
> ENBD_INFO ("received BLKMDUNTFY, now out of raid %x\n",
> @@ -3607,10 +3607,10 @@
> return 0;
>
> #ifndef BLKMDRGTR
> -#define BLKMDRGTR _IOW(0x12,135,sizeof(unsigned long))
> +#define BLKMDRGTR _IOW(0x12,135,unsigned long)
ditto. Funny - I don't remember that one. But yes, I did that too.
> #endif
> case BLKMDRGTR:
> - enbd_md.reg(&enbd_md, (int(*)(kdev_t, int))arg);
> + enbd_md.reg(&enbd_md, (int(*)(dev_t, int))arg);
Correct. I also changed all kdev_t's to dev_t's, which appears to be
__u32, eventually.
> return 0;
>
> } // PTB endsw
> @@ -3645,7 +3645,7 @@
> ENBD_ALERT ("null inode.\n");
> return -ENODEV;
> }
> - dev = minor (inode->i_rdev);
> + dev = iminor (inode);
OK. I did MINOR(inode->i_rdev).
> nbd = dev >> ENBD_SHIFT;
>
> if (nbd >= MAX_NBD) {
> @@ -3965,12 +3965,14 @@
> };
>
> static void
> -enbd_init_queue (struct enbd_device *lo, struct request_queue *queue)
> +enbd_init_queue (struct enbd_device *lo, struct request_queue **queue)
> {
>
> // PTB - set up kernel queue struct with default methods
> - blk_init_queue (queue, do_nbd_request, &enbd_lock);
> - blk_queue_max_sectors (queue, buf_sectors); /* max per request */
> + *queue = blk_init_queue (do_nbd_request, &enbd_lock);
> + if (*queue == NULL)
> + return;
> + blk_queue_max_sectors (*queue, buf_sectors); /* max per request */
Ah. Interesting. Yes. The blk_init_queue has to go up out of here and
into just before the point where this routine is called from, in
enbd_setup.
But why change to a double indirection? I don't think that's necessary.
One can pass the pointer to the queue into this routine as per normal,
and then all the rest of these changes are not required. I simply have:
static void
enbd_init_queue (struct enbd_device *lo, struct request_queue *queue)
{
// PTB - set up kernel queue struct with default methods
blk_queue_max_sectors (queue, buf_sectors); /* max per request */
and the rest of the routine is unaltered.
>
> /*
> * PTB - I think that put:
> @@ -3995,9 +3997,9 @@
> */
>
> // PTB control merge attempts so we do not overflow our buffer
> - lo->ll_merge_requests_fn = queue->merge_requests_fn;
> - lo->ll_front_merge_fn = queue->front_merge_fn;
> - lo->ll_back_merge_fn = queue->back_merge_fn;
> + lo->ll_merge_requests_fn = (*queue)->merge_requests_fn;
> + lo->ll_front_merge_fn = (*queue)->front_merge_fn;
> + lo->ll_back_merge_fn = (*queue)->back_merge_fn;
>
> // JSA - Add this line because under >=2.4.1, merge optimizations are in flux
> /*
> @@ -4013,10 +4015,10 @@
> * PTB - The functions below just impose our own stricter size limit before
> * - calling the defaults if all seems OK sizewise.
> */
> - queue->merge_requests_fn = &enbd_merge_requests_fn;
> - queue->front_merge_fn = &enbd_front_merge_fn;
> - queue->back_merge_fn = &enbd_back_merge_fn;
> -
> + (*queue)->merge_requests_fn = &enbd_merge_requests_fn;
> + (*queue)->front_merge_fn = &enbd_front_merge_fn;
> + (*queue)->back_merge_fn = &enbd_back_merge_fn;
> + return;
OK - so I didn't change any of those things.
> }
>
> int __init
> @@ -4041,11 +4043,17 @@
> * struct. These structs are big so we dynamically allocate
> * them.
> */
> + /* Tim responds ...
> + * Apparently this was changed later on in 2.5 (even 2.6.0-test>2?)
> + * so blk_init_queue now allocates the space.
> + * So we now first do enbd_init_queue, then check if it really
> + * was allocated.
> + */
Most of the changes I saw as I went through the kernel patches (I
downloaded them all last night!) were in test5.
> struct gendisk *disk = alloc_disk(ENBD_MAXCONN);
> - memset (lo, 0, sizeof (*lo));
> + memset (lo, 0, sizeof (*lo));
(:- thanks)
> if (disk) {
> lo->disk = disk;
> - disk->queue = kmalloc(sizeof(*disk->queue), GFP_KERNEL);
> + enbd_init_queue(lo, &(disk->queue));
Hmm. I'm not sure that's correct, but it's the right idea. What I did
was:
...
lo->disk = disk;
- disk->queue = kmalloc(sizeof(*disk->queue), GFP_KERNEL);
+ disk->queue = blk_init_queue(do_nbd_request, &enbd_lock);
if (!disk->queue) {
put_disk(disk);
...
and left the enbd_init_queue in its usual place lower down after the
following check.
> if (!disk->queue) {
> put_disk(disk);
> while (--i >= 0) {
> @@ -4059,8 +4067,6 @@
> }
> return -ENOMEM;
> }
> - memset(disk->queue, 0, sizeof(*disk->queue));
> - enbd_init_queue(lo, disk->queue);
Yes. The memset goes. But I left the enbd_init_queue here, as it does
what it is supposed to do.
put_disk(disk);
}
}
return -ENOMEM;
}
- memset(disk->queue, 0, sizeof(*disk->queue));
enbd_init_queue(lo, disk->queue);
}
}
> }
> }
>
> @@ -4162,8 +4168,8 @@
> struct request_queue *queue = disk->queue;
> if (queue) {
> blk_cleanup_queue (queue);
> - kfree (queue);
> - disk->queue = NULL;
> + //kfree (queue);
> + //disk->queue = NULL;
Arrrgh! Well spotted! I forgot that. Yes, we probably can't kfree
since we didn't kmalloc. Does blk_cleanup_queue now internally handle
the cleanup? Good. Yes -
* Description:
* blk_cleanup_queue is the pair to blk_init_queue() or
* blk_queue_make_request(). It should be called when a request
* queue is
* being released; typically when a block device is being
* de-registered.
* Currently, its primary task it to free all the &struct request
* structures that were allocated to the queue and the queue
* itself.
and I see inside it
kfree(q);
You are correct.
> }
> del_gendisk (disk);
> put_disk (disk);
> diff -Naur linux-2.6.0-orig/drivers/block/enbd/enbd_md.c linux-2.6.0-enbd/drivers/block/enbd/enbd_md.c
> --- linux-2.6.0-orig/drivers/block/enbd/enbd_md.c 2004-01-15 13:31:03.000000000 +0100
> +++ linux-2.6.0-enbd/drivers/block/enbd/enbd_md.c 2004-01-15 11:47:38.000000000 +0100
> @@ -44,7 +44,7 @@
> return res;
> }
> static int
> -enbd_md_reg (struct enbd_md *md, int(*fn)(kdev_t, int)) {
> +enbd_md_reg (struct enbd_md *md, int(*fn)(dev_t, int)) {
Correct. kdev_t => dev_t.
> spin_lock(&md->access_lock);
> if (!md->notify_fn) {
> md->notify_fn = fn;
> @@ -60,7 +60,7 @@
> * @lo the nbd device to tell them about
> */
> static int
> -enbd_notify_md_device (struct enbd_md *md, kdev_t enbd_dev, int cmd)
> +enbd_notify_md_device (struct enbd_md *md, dev_t enbd_dev, int cmd)
Ditto.
> {
> //kdev_t enbd_dev = mk_kdev (major, nbd << ENBD_SHIFT);
> int err;
> @@ -71,7 +71,7 @@
> md->notify_pid = current->pid;
> spin_unlock (&md->access_lock);
> ENBD_ALERT ("notifying %d for %x:%x to raid devices via fn\n",
> - cmd, major(enbd_dev), minor(enbd_dev));
> + cmd, MAJOR(enbd_dev), MINOR(enbd_dev));
Yes, now you are using the MAJOR and MINOR macros too. I'll have to check
what imajor and iminor do.
> err = md->notify_fn (enbd_dev, cmd);
> spin_lock (&md->access_lock);
> md->doing_notify = 0;
> @@ -101,4 +101,3 @@
> md->inc = enbd_md_inc;
> md->reg = enbd_md_reg;
> }
> -
> diff -Naur linux-2.6.0-orig/include/linux/enbd.h linux-2.6.0-enbd/include/linux/enbd.h
> --- linux-2.6.0-orig/include/linux/enbd.h 2004-01-15 13:31:03.000000000 +0100
> +++ linux-2.6.0-enbd/include/linux/enbd.h 2004-01-15 11:47:38.000000000 +0100
> @@ -162,11 +162,11 @@
> int doing_notify;
> int notify_pid;
> spinlock_t access_lock;
> - int (*notify_fn)(kdev_t, int);
> - int (*notify)(struct enbd_md *,kdev_t, int cmd);
> + int (*notify_fn)(dev_t, int);
> + int (*notify)(struct enbd_md *,dev_t, int cmd);
> int (*dec)(struct enbd_md *);
> int (*inc)(struct enbd_md *);
> - int (*reg)(struct enbd_md *, int(*)(kdev_t, int));
> + int (*reg)(struct enbd_md *, int(*)(dev_t, int));
Yes. All correct.
> };
>
> struct enbd_speed {
> @@ -181,7 +181,7 @@
>
> struct enbd_md_list {
> struct list_head list;
> - kdev_t dev;
> + dev_t dev;
Correct.
> };
>
> struct enbd_seqno; // forward decl
> @@ -525,5 +525,3 @@
> #endif /* MAJOR_NR */
>
> #endif /* LINUX_ENBD_H */
> -
> -
>
> -- end kernelpatch ---
>
> -- begin userspace patch ---
>
> diff -Naur nbd-2.4.32-orig/nbd/enbd-client.c nbd-2.4.32/nbd/enbd-client.c
> --- nbd-2.4.32-orig/nbd/enbd-client.c 2004-01-14 21:25:56.000000000 +0100
> +++ nbd-2.4.32/nbd/enbd-client.c 2004-01-14 21:27:32.000000000 +0100
> @@ -64,8 +64,8 @@
> #define BLKRRPART _IO(0x12,95)
> #define BLKGETSIZE _IO(0x12,96)
> #define BLKFLSBUF _IO(0x12,97)
> -#define BLKBSZGET _IOR(0x12,112,sizeof(int))
> -#define BLKBSZSET _IOW(0x12,113,sizeof(int))
:-(. I haven't looked at userspace yet! I'll believe you.
> +#define BLKBSZGET _IOR(0x12,112,int)
> +#define BLKBSZSET _IOW(0x12,113,int)
> #define BLKROSET _IO(0x12,93) /* set device read-only (0 = read-write) */
> #define BLKROGET _IO(0x12,94) /* get read-only status (0 = read_write) */
>
> diff -Naur nbd-2.4.32-orig/nbd/file.c nbd-2.4.32/nbd/file.c
> --- nbd-2.4.32-orig/nbd/file.c 2004-01-14 21:25:56.000000000 +0100
> +++ nbd-2.4.32/nbd/file.c 2004-01-14 21:27:32.000000000 +0100
> @@ -30,7 +30,7 @@
> #endif
> #ifndef BLKGETSIZE64
> /* lifted from fs.h */
> -#define BLKGETSIZE64 _IOR(0x12,114,sizeof(u64))
> +#define BLKGETSIZE64 _IOR(0x12,114,u64)
> #endif
>
> # include "cliserv.h"
> @@ -1454,4 +1454,3 @@
>
> return 0;
> }
> -
>
> -- end userspace patch ---
Yes, it is correct.
Thank you very much. It only remains to determine the difference
between MAJOR(inode->i_rdev) and imajor(inode).
OK .. there is none. It's defined in fs.h:
static inline unsigned imajor(struct inode *inode)
{
return MAJOR(inode->i_rdev);
}
I don't know what is better :-). What was all that kerfuffle about
kdev_t and dev_t for?
Peter
More information about the ENBD
mailing list