Index: sys/sys/param.h
===================================================================
RCS file: /cvsroot/src/sys/sys/param.h,v
retrieving revision 1.482
diff -u -r1.482 param.h
--- sys/sys/param.h	12 Jul 2015 08:11:28 -0000	1.482
+++ sys/sys/param.h	16 Aug 2015 17:34:02 -0000
@@ -63,7 +63,7 @@
  *	2.99.9		(299000900)
  */
 
-#define	__NetBSD_Version__	799002000	/* NetBSD 7.99.20 */
+#define	__NetBSD_Version__	799002100	/* NetBSD 7.99.21 */
 
 #define __NetBSD_Prereq__(M,m,p) (((((M) * 100000000) + \
     (m) * 1000000) + (p) * 100) <= __NetBSD_Version__)
Index: sys/sys/disk.h
===================================================================
RCS file: /cvsroot/src/sys/sys/disk.h,v
retrieving revision 1.64
diff -u -r1.64 disk.h
--- sys/sys/disk.h	2 May 2015 08:00:08 -0000	1.64
+++ sys/sys/disk.h	16 Aug 2015 17:34:02 -0000
@@ -476,10 +476,11 @@
 	void	(*d_minphys)(struct buf *);
 	int	(*d_open)(dev_t, int, int, struct lwp *);
 	int	(*d_close)(dev_t, int, int, struct lwp *);
-	void	(*d_diskstart)(device_t);
+	int	(*d_diskstart)(device_t, struct buf *);
 	void	(*d_iosize)(device_t, int *);
 	int	(*d_dumpblocks)(device_t, void *, daddr_t, int);
 	int	(*d_lastclose)(device_t);
+	int	(*d_discard)(device_t, off_t, off_t);
 };
 #endif
 
Index: sys/dev/dksubr.c
===================================================================
RCS file: /cvsroot/src/sys/dev/dksubr.c,v
retrieving revision 1.70
diff -u -r1.70 dksubr.c
--- sys/dev/dksubr.c	16 Aug 2015 17:28:28 -0000	1.70
+++ sys/dev/dksubr.c	16 Aug 2015 17:34:02 -0000
@@ -73,6 +73,7 @@
 	(MAKEDISKDEV(major((dev)), DISKUNIT((dev)), RAW_PART))
 
 static void	dk_makedisklabel(struct dk_softc *);
+static int	dk_translate(struct dk_softc *, struct buf *);
 
 void
 dk_init(struct dk_softc *dksc, device_t dev, int dtype)
@@ -89,6 +90,7 @@
 void
 dk_attach(struct dk_softc *dksc)
 {
+	mutex_init(&dksc->sc_iolock, MUTEX_DEFAULT, IPL_VM);
 	dksc->sc_flags |= DKF_INITED;
 #ifdef DIAGNOSTIC
 	dksc->sc_flags |= DKF_WARNLABEL | DKF_LABELSANITY;
@@ -99,6 +101,7 @@
 dk_detach(struct dk_softc *dksc)
 {
 	dksc->sc_flags &= ~DKF_INITED;
+	mutex_destroy(&dksc->sc_iolock);
 }
 
 /* ARGSUSED */
@@ -199,11 +202,10 @@
 	return 0;
 }
 
-void
-dk_strategy(struct dk_softc *dksc, struct buf *bp)
+static int
+dk_translate(struct dk_softc *dksc, struct buf *bp)
 {
-	const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
-	int	s, part;
+	int	part;
 	int	wlabel;
 	daddr_t	blkno;
 	struct disklabel *lp;
@@ -211,16 +213,6 @@
 	uint64_t numsecs;
 	unsigned secsize;
 
-	DPRINTF_FOLLOW(("dk_strategy(%s, %p, %p)\n",
-	    dksc->sc_xname, dksc, bp));
-
-	if (!(dksc->sc_flags & DKF_INITED)) {
-		DPRINTF_FOLLOW(("dk_strategy: not inited\n"));
-		bp->b_error  = ENXIO;
-		biodone(bp);
-		return;
-	}
-
 	lp = dksc->sc_dkdev.dk_label;
 	dk = &dksc->sc_dkdev;
 
@@ -234,29 +226,20 @@
 	 * The transfer must be a whole number of blocks and the offset must
 	 * not be negative.
 	 */
-	if ((bp->b_bcount % secsize) != 0 || bp->b_blkno < 0) {
-		bp->b_error = EINVAL;
-		biodone(bp);
-		return;
-	}
+	if ((bp->b_bcount % secsize) != 0 || bp->b_blkno < 0)
+		return EINVAL;
 
 	/* If there is nothing to do, then we are done */
-	if (bp->b_bcount == 0) {
-		biodone(bp);
-		return;
-	}
+	if (bp->b_bcount == 0)
+		return 0;
 
 	wlabel = dksc->sc_flags & (DKF_WLABEL|DKF_LABELLING);
 	if (part == RAW_PART) {
-		if (bounds_check_with_mediasize(bp, DEV_BSIZE, numsecs) <= 0) {
-			biodone(bp);
-			return;
-		}
+		if (bounds_check_with_mediasize(bp, DEV_BSIZE, numsecs) <= 0)
+			return bp->b_error;
 	} else {
-		if (bounds_check_with_label(&dksc->sc_dkdev, bp, wlabel) <= 0) {
-			biodone(bp);
-			return;
-		}
+		if (bounds_check_with_label(&dksc->sc_dkdev, bp, wlabel) <= 0)
+			return bp->b_error;
 	}
 
 	/*
@@ -272,15 +255,72 @@
 		blkno += lp->d_partitions[DISKPART(bp->b_dev)].p_offset;
 	bp->b_rawblkno = blkno;
 
+	return -1;
+}
+
+void
+dk_strategy(struct dk_softc *dksc, struct buf *bp)
+{
+	int error;
+
+	DPRINTF_FOLLOW(("dk_strategy(%s, %p, %p)\n",
+	    dksc->sc_xname, dksc, bp));
+
+	if (!(dksc->sc_flags & DKF_INITED)) {
+		DPRINTF_FOLLOW(("dk_strategy: not inited\n"));
+		bp->b_error  = ENXIO;
+		biodone(bp);
+		return;
+	}
+
+	error = dk_translate(dksc, bp);
+	if (error >= 0) {
+		biodone(bp);
+		return;
+	}
+
 	/*
-	 * Start the unit by calling the start routine
-	 * provided by the individual driver.
+	 * Queue buffer and start unit
 	 */
-	s = splbio();
-	bufq_put(dksc->sc_bufq, bp);
-	dkd->d_diskstart(dksc->sc_dev);
-	splx(s);
-	return;
+	dk_start(dksc, bp);
+}
+
+void
+dk_start(struct dk_softc *dksc, struct buf *bp)
+{
+	const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
+	int error;
+	struct buf *qbp __diagused;
+
+	mutex_enter(&dksc->sc_iolock);
+
+	if (bp != NULL)
+		bufq_put(dksc->sc_bufq, bp);
+
+	while ((bp = bufq_peek(dksc->sc_bufq)) != NULL) {
+
+		disk_busy(&dksc->sc_dkdev);
+		error = dkd->d_diskstart(dksc->sc_dev, bp);
+		if (error == EAGAIN) {
+			disk_unbusy(&dksc->sc_dkdev, 0, (bp->b_flags & B_READ));
+			break;
+		}
+
+#ifdef DIAGNOSTIC
+		qbp = bufq_get(dksc->sc_bufq);
+		KASSERT(bp == qbp);
+#else
+		(void) bufq_get(dksc->sc_bufq);
+#endif
+
+		if (error != 0) {
+			bp->b_error = error;
+			bp->b_resid = bp->b_bcount;
+			dk_done(dksc, bp);
+		}
+	}
+
+	mutex_exit(&dksc->sc_iolock);
 }
 
 void
@@ -296,14 +336,54 @@
 		printf("\n");
 	}
 
+	mutex_enter(&dksc->sc_iolock);
 	disk_unbusy(dk, bp->b_bcount - bp->b_resid, (bp->b_flags & B_READ));
+	mutex_exit(&dksc->sc_iolock);
+
 #ifdef notyet
 	rnd_add_uint(&dksc->sc_rnd_source, bp->b_rawblkno);
 #endif
+
 	biodone(bp);
 }
 
 int
+dk_discard(struct dk_softc *dksc, dev_t dev, off_t pos, off_t len)
+{
+	const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
+	unsigned secsize = dksc->sc_dkdev.dk_geom.dg_secsize;
+	struct buf tmp, *bp = &tmp;
+	int error;
+
+	DPRINTF_FOLLOW(("dk_discard(%s, %p, 0x"PRIx64", %jd, %jd)\n",
+	    dksc->sc_xname, dksc, (intmax_t)pos, (intmax_t)len));
+
+	if (!(dksc->sc_flags & DKF_INITED)) {
+		DPRINTF_FOLLOW(("dk_discard: not inited\n"));
+		return ENXIO;
+	}
+
+	if (secsize == 0 || (pos % secsize) != 0)
+		return EINVAL;
+
+	/* enough data to please the bounds checking code */
+	bp->b_dev = dev;
+	bp->b_blkno = (daddr_t)(pos / secsize);
+	bp->b_bcount = len;
+	bp->b_flags = B_WRITE;
+
+	error = dk_translate(dksc, bp);
+	if (error >= 0)
+		return error;
+
+	error = dkd->d_discard(dksc->sc_dev,
+		(off_t)bp->b_rawblkno * secsize,
+		(off_t)bp->b_bcount);
+
+	return error;
+}
+
+int
 dk_size(struct dk_softc *dksc, dev_t dev)
 {
 	const struct dkdriver *dkd = dksc->sc_dkdev.dk_driver;
@@ -462,12 +542,11 @@
 	case DIOCGSTRATEGY:
 	    {
 		struct disk_strategy *dks = (void *)data;
-		int s;
 
-		s = splbio();
+		mutex_enter(&dksc->sc_iolock);
 		strlcpy(dks->dks_name, bufq_getstrategyname(dksc->sc_bufq),
 		    sizeof(dks->dks_name));
-		splx(s);
+		mutex_exit(&dksc->sc_iolock);
 		dks->dks_paramlen = 0;
 
 		return 0;
@@ -478,7 +557,6 @@
 		struct disk_strategy *dks = (void *)data;
 		struct bufq_state *new;
 		struct bufq_state *old;
-		int s;
 
 		if (dks->dks_param != NULL) {
 			return EINVAL;
@@ -489,11 +567,11 @@
 		if (error) {
 			return error;
 		}
-		s = splbio();
+		mutex_enter(&dksc->sc_iolock);
 		old = dksc->sc_bufq;
 		bufq_move(new, old);
 		dksc->sc_bufq = new;
-		splx(s);
+		mutex_exit(&dksc->sc_iolock);
 		bufq_free(old);
 
 		return 0;
Index: sys/dev/dkvar.h
===================================================================
RCS file: /cvsroot/src/sys/dev/dkvar.h,v
retrieving revision 1.20
diff -u -r1.20 dkvar.h
--- sys/dev/dkvar.h	2 May 2015 08:00:08 -0000	1.20
+++ sys/dev/dkvar.h	16 Aug 2015 17:34:02 -0000
@@ -44,6 +44,7 @@
 #define DK_XNAME_SIZE 8
 	char			 sc_xname[DK_XNAME_SIZE]; /* external name */
 	struct disk		 sc_dkdev;	/* generic disk info */
+	kmutex_t		 sc_iolock;	/* protects buffer queue */
 	struct bufq_state	*sc_bufq;	/* buffer queue */
 	int			 sc_dtype;	/* disk type */
 };
@@ -85,6 +86,8 @@
 int	dk_close(struct dk_softc *, dev_t,
 		 int, int, struct lwp *);
 void	dk_strategy(struct dk_softc *, struct buf *);
+int	dk_discard(struct dk_softc *, dev_t, off_t, off_t);
+void	dk_start(struct dk_softc *, struct buf *);
 void	dk_done(struct dk_softc *, struct buf *);
 int	dk_size(struct dk_softc *, dev_t);
 int	dk_ioctl(struct dk_softc *, dev_t,
Index: sys/dev/ld.c
===================================================================
RCS file: /cvsroot/src/sys/dev/ld.c,v
retrieving revision 1.88
diff -u -r1.88 ld.c
--- sys/dev/ld.c	16 Aug 2015 17:32:31 -0000	1.88
+++ sys/dev/ld.c	16 Aug 2015 17:34:03 -0000
@@ -63,7 +63,7 @@
 static void	ldminphys(struct buf *bp);
 static bool	ld_suspend(device_t, const pmf_qual_t *);
 static bool	ld_shutdown(device_t, int);
-static void	ld_start(device_t);
+static int	ld_diskstart(device_t, struct buf *bp);
 static void	ld_iosize(device_t, int *);
 static int	ld_dumpblocks(device_t, void *, daddr_t, int);
 static void	ld_fake_geometry(struct ld_softc *);
@@ -90,7 +90,7 @@
 	.d_dump = lddump,
 	.d_psize = ldsize,
 	.d_discard = nodiscard,
-	.d_flag = D_DISK
+	.d_flag = D_DISK | D_MPSAFE
 };
 
 const struct cdevsw ld_cdevsw = {
@@ -105,7 +105,7 @@
 	.d_mmap = nommap,
 	.d_kqfilter = nokqfilter,
 	.d_discard = nodiscard,
-	.d_flag = D_DISK
+	.d_flag = D_DISK | D_MPSAFE
 };
 
 static struct	dkdriver lddkdriver = {
@@ -114,7 +114,7 @@
 	.d_strategy = ldstrategy,
 	.d_iosize = ld_iosize,
 	.d_minphys  = ldminphys,
-	.d_diskstart = ld_start,
+	.d_diskstart = ld_diskstart,
 	.d_dumpblocks = ld_dumpblocks,
 	.d_lastclose = ld_lastclose
 };
@@ -404,55 +404,28 @@
 	return dk_strategy(dksc, bp);
 }
 
-static void
-ld_start(device_t dev)
+static int
+ld_diskstart(device_t dev, struct buf *bp)
 {
 	struct ld_softc *sc = device_private(dev);
-	struct dk_softc *dksc = &sc->sc_dksc;
-	struct buf *bp;
 	int error;
 
+	if (sc->sc_queuecnt >= sc->sc_maxqueuecnt)
+		return EAGAIN;
+
 	mutex_enter(&sc->sc_mutex);
 
-	while (sc->sc_queuecnt < sc->sc_maxqueuecnt) {
-		/* See if there is work to do. */
-		if ((bp = bufq_peek(dksc->sc_bufq)) == NULL)
-			break;
-
-		disk_busy(&dksc->sc_dkdev);
-		sc->sc_queuecnt++;
-
-		if (__predict_true((error = (*sc->sc_start)(sc, bp)) == 0)) {
-			/*
-			 * The back-end is running the job; remove it from
-			 * the queue.
-			 */
-			(void) bufq_get(dksc->sc_bufq);
-		} else  {
-			disk_unbusy(&dksc->sc_dkdev, 0, (bp->b_flags & B_READ));
-			sc->sc_queuecnt--;
-			if (error == EAGAIN) {
-				/*
-				 * Temporary resource shortage in the
-				 * back-end; just defer the job until
-				 * later.
-				 *
-				 * XXX We might consider a watchdog timer
-				 * XXX to make sure we are kicked into action.
-				 */
-				break;
-			} else {
-				(void) bufq_get(dksc->sc_bufq);
-				bp->b_error = error;
-				bp->b_resid = bp->b_bcount;
-				mutex_exit(&sc->sc_mutex);
-				biodone(bp);
-				mutex_enter(&sc->sc_mutex);
-			}
-		}
+	if (sc->sc_queuecnt >= sc->sc_maxqueuecnt)
+		error = EAGAIN;
+	else {
+		error = (*sc->sc_start)(sc, bp);
+		if (error == 0)
+			sc->sc_queuecnt++;
 	}
 
 	mutex_exit(&sc->sc_mutex);
+
+	return error;
 }
 
 void
@@ -469,7 +442,7 @@
 			cv_broadcast(&sc->sc_drain);
 		}
 		mutex_exit(&sc->sc_mutex);
-		ld_start(dksc->sc_dev);
+		dk_start(dksc, NULL);
 	} else
 		mutex_exit(&sc->sc_mutex);
 }
Index: sys/dev/cgd.c
===================================================================
RCS file: /cvsroot/src/sys/dev/cgd.c,v
retrieving revision 1.98
diff -u -r1.98 cgd.c
--- sys/dev/cgd.c	2 May 2015 08:00:08 -0000	1.98
+++ sys/dev/cgd.c	16 Aug 2015 17:34:03 -0000
@@ -104,7 +104,7 @@
 
 /* Internal Functions */
 
-static void	cgd_start(device_t);
+static int	cgd_diskstart(device_t, struct buf *);
 static void	cgdiodone(struct buf *);
 
 static int	cgd_ioctl_set(struct cgd_softc *, void *, struct lwp *);
@@ -121,7 +121,7 @@
         .d_close = cgdclose,
         .d_strategy = cgdstrategy,
         .d_iosize = NULL,
-        .d_diskstart = cgd_start,
+        .d_diskstart = cgd_diskstart,
         .d_dumpblocks = NULL,
         .d_lastclose = NULL
 };
@@ -379,79 +379,65 @@
 	}
 }
 
-static void
-cgd_start(device_t dev)
+static int
+cgd_diskstart(device_t dev, struct buf *bp)
 {
 	struct	cgd_softc *cs = device_private(dev);
 	struct	dk_softc *dksc = &cs->sc_dksc;
-	struct	buf *bp, *nbp;
-#ifdef DIAGNOSTIC
-	struct	buf *qbp;
-#endif
+	struct	buf *nbp;
 	void *	addr;
 	void *	newaddr;
 	daddr_t	bn;
 	struct	vnode *vp;
 
-	while ((bp = bufq_peek(dksc->sc_bufq)) != NULL) {
+	DPRINTF_FOLLOW(("cgd_diskstart(%p, %p)\n", dksc, bp));
 
-		DPRINTF_FOLLOW(("cgd_start(%p, %p)\n", dksc, bp));
-		disk_busy(&dksc->sc_dkdev);
+	bn = bp->b_rawblkno;
 
-		bn = bp->b_rawblkno;
+	/*
+	 * We attempt to allocate all of our resources up front, so that
+	 * we can fail quickly if they are unavailable.
+	 */
+	nbp = getiobuf(cs->sc_tvn, false);
+	if (nbp == NULL)
+		return EAGAIN;
 
-		/*
-		 * We attempt to allocate all of our resources up front, so that
-		 * we can fail quickly if they are unavailable.
-		 */
-		nbp = getiobuf(cs->sc_tvn, false);
-		if (nbp == NULL) {
-			disk_unbusy(&dksc->sc_dkdev, 0, (bp->b_flags & B_READ));
-			break;
+	/*
+	 * If we are writing, then we need to encrypt the outgoing
+	 * block into a new block of memory.
+	 */
+	newaddr = addr = bp->b_data;
+	if ((bp->b_flags & B_READ) == 0) {
+		newaddr = cgd_getdata(dksc, bp->b_bcount);
+		if (!newaddr) {
+			putiobuf(nbp);
+			return EAGAIN;
 		}
+		cgd_cipher(cs, newaddr, addr, bp->b_bcount, bn,
+		    DEV_BSIZE, CGD_CIPHER_ENCRYPT);
+	}
 
-		/*
-		 * If we are writing, then we need to encrypt the outgoing
-		 * block into a new block of memory.
-		 */
-		newaddr = addr = bp->b_data;
-		if ((bp->b_flags & B_READ) == 0) {
-			newaddr = cgd_getdata(dksc, bp->b_bcount);
-			if (!newaddr) {
-				putiobuf(nbp);
-				disk_unbusy(&dksc->sc_dkdev, 0, (bp->b_flags & B_READ));
-				break;
-			}
-			cgd_cipher(cs, newaddr, addr, bp->b_bcount, bn,
-			    DEV_BSIZE, CGD_CIPHER_ENCRYPT);
-		}
-		/* we now have all needed resources to process this buf */
-#ifdef DIAGNOSTIC
-		qbp = bufq_get(dksc->sc_bufq);
-		KASSERT(bp == qbp);
-#else
-		(void)bufq_get(dksc->sc_bufq);
-#endif
-		nbp->b_data = newaddr;
-		nbp->b_flags = bp->b_flags;
-		nbp->b_oflags = bp->b_oflags;
-		nbp->b_cflags = bp->b_cflags;
-		nbp->b_iodone = cgdiodone;
-		nbp->b_proc = bp->b_proc;
-		nbp->b_blkno = bn;
-		nbp->b_bcount = bp->b_bcount;
-		nbp->b_private = bp;
-
-		BIO_COPYPRIO(nbp, bp);
-
-		if ((nbp->b_flags & B_READ) == 0) {
-			vp = nbp->b_vp;
-			mutex_enter(vp->v_interlock);
-			vp->v_numoutput++;
-			mutex_exit(vp->v_interlock);
-		}
-		VOP_STRATEGY(cs->sc_tvn, nbp);
+	nbp->b_data = newaddr;
+	nbp->b_flags = bp->b_flags;
+	nbp->b_oflags = bp->b_oflags;
+	nbp->b_cflags = bp->b_cflags;
+	nbp->b_iodone = cgdiodone;
+	nbp->b_proc = bp->b_proc;
+	nbp->b_blkno = bn;
+	nbp->b_bcount = bp->b_bcount;
+	nbp->b_private = bp;
+
+	BIO_COPYPRIO(nbp, bp);
+
+	if ((nbp->b_flags & B_READ) == 0) {
+		vp = nbp->b_vp;
+		mutex_enter(vp->v_interlock);
+		vp->v_numoutput++;
+		mutex_exit(vp->v_interlock);
 	}
+	VOP_STRATEGY(cs->sc_tvn, nbp);
+
+	return 0;
 }
 
 static void
@@ -460,7 +446,6 @@
 	struct	buf *obp = nbp->b_private;
 	struct	cgd_softc *cs = getcgd_softc(obp->b_dev);
 	struct	dk_softc *dksc = &cs->sc_dksc;
-	int s;
 
 	KDASSERT(cs);
 
@@ -492,16 +477,7 @@
 
 	putiobuf(nbp);
 
-	/* Request is complete for whatever reason */
-	obp->b_resid = 0;
-	if (obp->b_error != 0)
-		obp->b_resid = obp->b_bcount;
-	s = splbio();
-	disk_unbusy(&dksc->sc_dkdev, obp->b_bcount - obp->b_resid,
-	    (obp->b_flags & B_READ));
-	biodone(obp);
-	cgd_start(dksc->sc_dev);
-	splx(s);
+	dk_done(dksc, obp);
 }
 
 /* XXX: we should probably put these into dksubr.c, mostly */
Index: sys/arch/xen/xen/xbd_xenbus.c
===================================================================
RCS file: /cvsroot/src/sys/arch/xen/xen/xbd_xenbus.c,v
retrieving revision 1.71
diff -u -r1.71 xbd_xenbus.c
--- sys/arch/xen/xen/xbd_xenbus.c	2 May 2015 08:00:08 -0000	1.71
+++ sys/arch/xen/xen/xbd_xenbus.c	16 Aug 2015 17:34:03 -0000
@@ -40,7 +40,7 @@
  * - initiate request: xbdread/write/open/ioctl/..
  * - depending on operation, it is handled directly by disk(9) subsystem or
  *   goes through physio(9) first.
- * - the request is ultimately processed by xbdstart() that prepares the
+ * - the request is ultimately processed by xbd_diskstart() that prepares the
  *   xbd requests, post them in the ring I/O queue, then signal the backend.
  *
  * When a response is available in the queue, the backend signals the frontend
@@ -168,7 +168,7 @@
 static bool xbd_xenbus_resume(device_t, const pmf_qual_t *);
 
 static int  xbd_handler(void *);
-static void xbdstart(device_t);
+static int  xbd_diskstart(device_t, struct buf *);
 static void xbd_backend_changed(void *, XenbusState);
 static void xbd_connect(struct xbd_xenbus_softc *);
 
@@ -223,7 +223,7 @@
 	.d_minphys = xbdminphys,
 	.d_open = xbdopen,
 	.d_close = xbdclose,
-	.d_diskstart = xbdstart,
+	.d_diskstart = xbd_diskstart,
 };
 
 static int
@@ -697,7 +697,7 @@
 			bp->b_resid = bp->b_bcount;
 			goto next;
 		}
-		/* b_resid was set in xbdstart */
+		/* b_resid was set in dk_start */
 next:
 		if (bp->b_data != xbdreq->req_data)
 			xbd_unmap_align(xbdreq);
@@ -720,7 +720,7 @@
 	if (sc->sc_xbdreq_wait)
 		wakeup(&sc->sc_xbdreq_wait);
 	else
-		xbdstart(sc->sc_dksc.sc_dev);
+		dk_start(&sc->sc_dksc, NULL);
 	return 1;
 }
 
@@ -918,156 +918,111 @@
 	return dk_dump(&sc->sc_dksc, dev, blkno, va, size);
 }
 
-static void
-xbdstart(device_t self)
+static int
+xbd_diskstart(device_t self, struct buf *bp)
 {
 	struct xbd_xenbus_softc *sc = device_private(self);
-	struct dk_softc *dksc = &sc->sc_dksc;
-	struct buf *bp;
-#ifdef DIAGNOSTIC
-	struct  buf *qbp; 
-#endif
 	struct xbd_req *xbdreq;
 	blkif_request_t *req;
 	size_t bcount, off;
 	paddr_t ma;
 	vaddr_t va;
 	int nsects, nbytes, seg;
-	int notify;
-
-	while ((bp = bufq_peek(dksc->sc_bufq)) != NULL) {
+	int notify, error = 0;
 
-		DPRINTF(("xbdstart(%p): b_bcount = %ld\n",
-		    bp, (long)bp->b_bcount));
+	DPRINTF(("xbd_diskstart(%p): b_bcount = %ld\n",
+	    bp, (long)bp->b_bcount));
 
-		if (sc->sc_shutdown != BLKIF_SHUTDOWN_RUN) {
-			bp->b_error = EIO;
-			goto err;
-		}
+	if (sc->sc_shutdown != BLKIF_SHUTDOWN_RUN) {
+		error = EIO;
+		goto err;
+	}
 
-		if (bp->b_rawblkno < 0 || bp->b_rawblkno > sc->sc_xbdsize) {
-			/* invalid block number */
-			bp->b_error = EINVAL;
-			goto err;
-		}
+	if (bp->b_rawblkno < 0 || bp->b_rawblkno > sc->sc_xbdsize) {
+		/* invalid block number */
+		error = EINVAL;
+		goto err;
+	}
 
-		if (bp->b_rawblkno == sc->sc_xbdsize) {
-			/* at end of disk; return short read */
-			bp->b_resid = bp->b_bcount;
-#ifdef DIAGNOSTIC 
-			qbp = bufq_get(dksc->sc_bufq);
-			KASSERT(bp == qbp);
-#else
-			(void)bufq_get(dksc->sc_bufq);
-#endif
-			biodone(bp);
-			continue;
-		}
+	if (__predict_false(
+	    sc->sc_backend_status == BLKIF_STATE_SUSPENDED)) {
+		/* device is suspended, do not consume buffer */
+		DPRINTF(("%s: (xbd_diskstart) device suspended\n",
+		    sc->sc_dksc.sc_xname));
+		error = EAGAIN;
+		goto out;
+	}
 
-		if (__predict_false(
-		    sc->sc_backend_status == BLKIF_STATE_SUSPENDED)) {
-			/* device is suspended, do not consume buffer */
-			DPRINTF(("%s: (xbdstart) device suspended\n",
-			    device_xname(sc->sc_dksc.sc_dev)));
-			goto out;
-		}
+	if (RING_FULL(&sc->sc_ring) || sc->sc_xbdreq_wait) {
+		DPRINTF(("xbd_diskstart: ring_full\n"));
+		error = EAGAIN;
+		goto out;
+	}
 
-		if (RING_FULL(&sc->sc_ring) || sc->sc_xbdreq_wait) {
-			DPRINTF(("xbdstart: ring_full\n"));
-			goto out;
-		}
+	xbdreq = SLIST_FIRST(&sc->sc_xbdreq_head);
+	if (__predict_false(xbdreq == NULL)) {
+		DPRINTF(("xbd_diskstart: no req\n"));
+		error = EAGAIN;
+		goto out;
+	}
 
-		xbdreq = SLIST_FIRST(&sc->sc_xbdreq_head);
-		if (__predict_false(xbdreq == NULL)) {
-			DPRINTF(("xbdstart: no req\n"));
+	xbdreq->req_bp = bp;
+	xbdreq->req_data = bp->b_data;
+	if ((vaddr_t)bp->b_data & (XEN_BSIZE - 1)) {
+		if (__predict_false(xbd_map_align(xbdreq) != 0)) {
+			DPRINTF(("xbd_diskstart: no align\n"));
+			error = EAGAIN;
 			goto out;
 		}
+	}
 
-		xbdreq->req_bp = bp;
-		xbdreq->req_data = bp->b_data;
-		if ((vaddr_t)bp->b_data & (XEN_BSIZE - 1)) {
-			if (__predict_false(xbd_map_align(xbdreq) != 0)) {
-				DPRINTF(("xbdstart: no align\n"));
-				goto out;
-			}
-		}
-		/* now we're sure we'll send this buf */
-#ifdef DIAGNOSTIC 
-		qbp = bufq_get(dksc->sc_bufq);
-		KASSERT(bp == qbp);
-#else
-		(void)bufq_get(dksc->sc_bufq);
-#endif
-		disk_busy(&dksc->sc_dkdev);
-
-		SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next);
-		req = RING_GET_REQUEST(&sc->sc_ring, sc->sc_ring.req_prod_pvt);
-		req->id = xbdreq->req_id;
-		req->operation =
-		    bp->b_flags & B_READ ? BLKIF_OP_READ : BLKIF_OP_WRITE;
-		req->sector_number = bp->b_rawblkno;
-		req->handle = sc->sc_handle;
-
-		va = (vaddr_t)xbdreq->req_data & ~PAGE_MASK;
-		off = (vaddr_t)xbdreq->req_data & PAGE_MASK;
-		if (bp->b_rawblkno + bp->b_bcount / DEV_BSIZE >=
-		    sc->sc_xbdsize) {
-			bcount = (sc->sc_xbdsize - bp->b_rawblkno) * DEV_BSIZE;
-			bp->b_resid = bp->b_bcount - bcount;
-		} else {
-			bcount = bp->b_bcount;
-			bp->b_resid = 0;
-		}
-		if (bcount > XBD_MAX_XFER) {
-			bp->b_resid += bcount - XBD_MAX_XFER;
-			bcount = XBD_MAX_XFER;
-		}
-		for (seg = 0; bcount > 0;) {
-			pmap_extract_ma(pmap_kernel(), va, &ma);
-			KASSERT((ma & (XEN_BSIZE - 1)) == 0);
-			if (bcount > PAGE_SIZE - off)
-				nbytes = PAGE_SIZE - off;
-			else
-				nbytes = bcount;
-			nsects = nbytes >> XEN_BSHIFT;
-			req->seg[seg].first_sect = off >> XEN_BSHIFT;
-			req->seg[seg].last_sect =
-			    (off >> XEN_BSHIFT) + nsects - 1;
-			KASSERT(req->seg[seg].first_sect <=
-			    req->seg[seg].last_sect);
-			KASSERT(req->seg[seg].last_sect < 8);
-			if (__predict_false(xengnt_grant_access(
-			    sc->sc_xbusd->xbusd_otherend_id, ma,
-			    (bp->b_flags & B_READ) == 0,
-			    &xbdreq->req_gntref[seg])))
-				panic("xbdstart: xengnt_grant_access"); /* XXX XXX !!! */
-			req->seg[seg].gref = xbdreq->req_gntref[seg];
-			seg++;
-			KASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST);
-			va += PAGE_SIZE;
-			off = 0;
-			bcount -= nbytes;
-		}
-		xbdreq->req_nr_segments = req->nr_segments = seg;
-		sc->sc_ring.req_prod_pvt++;
+	SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next);
+	req = RING_GET_REQUEST(&sc->sc_ring, sc->sc_ring.req_prod_pvt);
+	req->id = xbdreq->req_id;
+	req->operation =
+	    bp->b_flags & B_READ ? BLKIF_OP_READ : BLKIF_OP_WRITE;
+	req->sector_number = bp->b_rawblkno;
+	req->handle = sc->sc_handle;
+
+	va = (vaddr_t)xbdreq->req_data & ~PAGE_MASK;
+	off = (vaddr_t)xbdreq->req_data & PAGE_MASK;
+	bcount = bp->b_bcount;
+	bp->b_resid = 0;
+	for (seg = 0; bcount > 0;) {
+		pmap_extract_ma(pmap_kernel(), va, &ma);
+		KASSERT((ma & (XEN_BSIZE - 1)) == 0);
+		if (bcount > PAGE_SIZE - off)
+			nbytes = PAGE_SIZE - off;
+		else
+			nbytes = bcount;
+		nsects = nbytes >> XEN_BSHIFT;
+		req->seg[seg].first_sect = off >> XEN_BSHIFT;
+		req->seg[seg].last_sect =
+		    (off >> XEN_BSHIFT) + nsects - 1;
+		KASSERT(req->seg[seg].first_sect <=
+		    req->seg[seg].last_sect);
+		KASSERT(req->seg[seg].last_sect < 8);
+		if (__predict_false(xengnt_grant_access(
+		    sc->sc_xbusd->xbusd_otherend_id, ma,
+		    (bp->b_flags & B_READ) == 0,
+		    &xbdreq->req_gntref[seg])))
+			panic("xbd_diskstart: xengnt_grant_access"); /* XXX XXX !!! */
+		req->seg[seg].gref = xbdreq->req_gntref[seg];
+		seg++;
+		KASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST);
+		va += PAGE_SIZE;
+		off = 0;
+		bcount -= nbytes;
 	}
+	xbdreq->req_nr_segments = req->nr_segments = seg;
+	sc->sc_ring.req_prod_pvt++;
 
 out:
 	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_ring, notify);
 	if (notify)
 		hypervisor_notify_via_evtchn(sc->sc_evtchn);
-	return;
-
 err:
-#ifdef DIAGNOSTIC 
-	qbp = bufq_get(dksc->sc_bufq);
-	KASSERT(bp == qbp);
-#else
-	(void)bufq_get(dksc->sc_bufq);
-#endif
-	bp->b_resid = bp->b_bcount;
-	biodone(bp);
-	return;
+	return error;
 }
 
 static int
