PCI Express hotplug support, adapted from OpenBSD.

Index: pci.c
===================================================================
RCS file: /cvsroot/src/sys/dev/pci/pci.c,v
retrieving revision 1.118
diff -u -r1.118 pci.c
--- pci.c	12 Jun 2008 22:44:47 -0000	1.118
+++ pci.c	23 Sep 2008 02:17:53 -0000
@@ -311,6 +311,7 @@
 	pa.pa_tag = tag;
 	pa.pa_id = id;
 	pa.pa_class = class;
+	pa.pa_bridgetag = sc->sc_bridgetag;
 
 	/*
 	 * Set up memory, I/O enable, and PCI command flags
@@ -756,10 +757,8 @@
 	int offset;
 	pcireg_t value;
 
-	if (!pci_get_capability(pc, tag, PCI_CAP_PWRMGMT, &offset, &value)) {
-		printf("pci_set_powerstate not supported\n");
+	if (!pci_get_capability(pc, tag, PCI_CAP_PWRMGMT, &offset, &value))
 		return EOPNOTSUPP;
-	}
 
 	return pci_set_powerstate_int(pc, tag, state, offset, value);
 }
Index: pcivar.h
===================================================================
RCS file: /cvsroot/src/sys/dev/pci/pcivar.h,v
retrieving revision 1.83
diff -u -r1.83 pcivar.h
--- pcivar.h	22 Jul 2008 04:52:19 -0000	1.83
+++ pcivar.h	23 Sep 2008 02:17:53 -0000
@@ -104,6 +104,8 @@
 	pcitag_t	pa_tag;
 	pcireg_t	pa_id, pa_class;
 
+	pcitag_t	*pa_bridgetag;
+
 	/*
 	 * Interrupt information.
 	 *
Index: ppb.c
===================================================================
RCS file: /cvsroot/src/sys/dev/pci/ppb.c,v
retrieving revision 1.39
diff -u -r1.39 ppb.c
--- ppb.c	3 May 2008 05:44:06 -0000	1.39
+++ ppb.c	23 Sep 2008 02:17:53 -0000
@@ -34,25 +34,52 @@
 __KERNEL_RCSID(0, "$NetBSD: ppb.c,v 1.39 2008/05/03 05:44:06 cegger Exp $");
 
 #include <sys/param.h>
+#include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/device.h>
+#include <sys/workqueue.h>
 
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/ppbreg.h>
 #include <dev/pci/pcidevs.h>
 
+#include "locators.h"
+
+extern int pci_enumerate_bus(struct pci_softc *, const int *,
+    int (*)(struct pci_attach_args *), struct pci_attach_args *);
+
+struct ppb_work {
+	struct work	pw_work;
+	enum ppb_event {
+		PPB_EV_INSERT,
+		PPB_EV_REMOVE
+	}		pw_event;
+};
+
 struct ppb_softc {
 	device_t sc_dev;		/* generic device glue */
 	pci_chipset_tag_t sc_pc;	/* our PCI chipset... */
 	pcitag_t sc_tag;		/* ...and tag. */
 
+	device_t sc_pcibus;
+	void *sc_ih;
+	int sc_pcie_off;
+	struct workqueue *sc_wq;
+	struct ppb_work	sc_pw_insert, sc_pw_remove;
+
 	pcireg_t sc_pciconfext[48];
 };
 
 static bool		ppb_resume(device_t PMF_FN_PROTO);
 static bool		ppb_suspend(device_t PMF_FN_PROTO);
+static int		ppb_intr(void *);
+static void		ppb_event_worker(struct work *, void *);
+static void		ppb_rescan(struct ppb_softc *);
+static int		ppb_fixup(struct pci_attach_args *);
+static int		ppb_fixup_type0(pci_chipset_tag_t, pcitag_t, pcitag_t);
+static int		ppb_fixup_type1(pci_chipset_tag_t, pcitag_t, pcitag_t);
 
 static int
 ppbmatch(device_t parent, cfdata_t match, void *aux)
@@ -72,29 +99,6 @@
 }
 
 static void
-ppb_fix_pcix(device_t self)
-{
-	struct ppb_softc *sc = device_private(self);
-	pcireg_t reg;
-	int off;
-
-	if (!pci_get_capability(sc->sc_pc, sc->sc_tag, PCI_CAP_PCIEXPRESS,
-				&off, &reg))
-		return; /* Not a PCIe device */
-
-	if ((reg & 0x000f0000) != 0x00010000) {
-		aprint_normal_dev(self, "unsupported PCI Express version\n");
-		return;
-	}
-	reg = pci_conf_read(sc->sc_pc, sc->sc_tag, off + 0x18);
-	if (reg & 0x003f) {
-		aprint_normal_dev(self, "disabling notification events\n");
-		reg &= ~0x003f;
-		pci_conf_write(sc->sc_pc, sc->sc_tag, off + 0x18, reg);
-	}
-}
-
-static void
 ppbattach(device_t parent, device_t self, void *aux)
 {
 	struct ppb_softc *sc = device_private(self);
@@ -103,6 +107,7 @@
 	struct pcibus_attach_args pba;
 	pcireg_t busdata;
 	char devinfo[256];
+	pcireg_t reg;
 
 	pci_devinfo(pa->pa_id, pa->pa_class, 0, devinfo, sizeof(devinfo));
 	aprint_normal(": %s (rev. 0x%02x)\n", devinfo,
@@ -112,6 +117,9 @@
 	sc->sc_pc = pc;
 	sc->sc_tag = pa->pa_tag;
 	sc->sc_dev = self;
+	sc->sc_wq = NULL;
+	sc->sc_pw_insert.pw_event = PPB_EV_INSERT;
+	sc->sc_pw_remove.pw_event = PPB_EV_REMOVE;
 
 	busdata = pci_conf_read(pc, pa->pa_tag, PPB_REG_BUSINFO);
 
@@ -120,7 +128,40 @@
 		return;
 	}
 
-	ppb_fix_pcix(self);
+	if (pci_get_capability(sc->sc_pc, sc->sc_tag, PCI_CAP_PCIEXPRESS,
+			       &sc->sc_pcie_off, &reg)) {
+		const char *intrstr;
+		pci_intr_handle_t ih;
+		pcireg_t csr;
+		int err;
+
+		err = workqueue_create(&sc->sc_wq, "pciehp",
+		    ppb_event_worker, sc, PRI_NONE, IPL_NONE, 0);
+		if (err) {
+			aprint_error_dev(self, "couldn't create workqueue\n");
+			goto nointr;
+		}
+
+		csr = pci_conf_read(sc->sc_pc, sc->sc_tag,
+		    sc->sc_pcie_off + 0x18);
+		csr |= 0x28;
+		pci_conf_write(sc->sc_pc, sc->sc_tag,
+		    sc->sc_pcie_off + 0x18, csr);
+
+		if (pci_intr_map(pa, &ih)) {
+			aprint_error_dev(self, "couldn't map interrupt\n");
+			goto nointr;
+		}
+ 		intrstr = pci_intr_string(pc, ih);
+		sc->sc_ih = pci_intr_establish(pc, ih, IPL_VM, ppb_intr, sc);
+		if (sc->sc_ih == NULL) {
+			aprint_error_dev(self,
+			    "couldn't establish interrupt\n");
+			goto nointr;
+		}
+		aprint_normal_dev(self, "interrupting at %s\n", intrstr);
+	}
+nointr:
 
 #if 0
 	/*
@@ -154,14 +195,23 @@
 	pba.pba_intrswiz = pa->pa_intrswiz;
 	pba.pba_intrtag = pa->pa_intrtag;
 
-	config_found_ia(self, "pcibus", &pba, pcibusprint);
+	sc->sc_pcibus = config_found_ia(self, "pcibus", &pba, pcibusprint);
 }
 
 static int
 ppbdetach(device_t self, int flags)
 {
+	struct ppb_softc *sc = device_private(self);
 	int rc;
 
+	if (sc->sc_ih) {
+		pci_intr_disestablish(sc->sc_pc, sc->sc_ih);
+		sc->sc_ih = NULL;
+	}
+	if (sc->sc_wq) {
+		workqueue_destroy(sc->sc_wq);
+		sc->sc_wq = NULL;
+	}
 	if ((rc = config_detach_children(self, flags)) != 0)
 		return rc;
 	pmf_device_deregister(self);
@@ -182,8 +232,6 @@
 			    sc->sc_pciconfext[(off - 0x40)/4]);
 	}
 
-	ppb_fix_pcix(dv);
-
 	return true;
 }
 
@@ -200,6 +248,239 @@
 	return true;
 }
 
+static int
+ppb_intr(void *opaque)
+{
+	struct ppb_softc *sc = opaque;
+	pcireg_t reg;
+	int ret = 0;
+
+	reg = pci_conf_read(sc->sc_pc, sc->sc_tag, sc->sc_pcie_off + 0x18);
+	if (reg & 0x00080000) {	/* hotplug status change */
+		if (reg & 0x00400000) {
+			aprint_debug_dev(sc->sc_dev, "device inserted\n");
+			workqueue_enqueue(sc->sc_wq,
+			    (struct work *)&sc->sc_pw_insert, NULL);
+		} else {
+			aprint_debug_dev(sc->sc_dev, "device removed\n");
+			workqueue_enqueue(sc->sc_wq,
+			    (struct work *)&sc->sc_pw_remove, NULL);
+		}
+		++ret;
+	}
+	if (reg & 0x00040000) {	/* MRL sensor changed */
+		aprint_debug_dev(sc->sc_dev, "MRL sensor changed\n");
+		++ret;
+	}
+	if (reg & 0x00020000) {	/* power fault */
+		aprint_debug_dev(sc->sc_dev, "power fault\n");
+		++ret;
+	}
+	if (reg & 0x00010000) {	/* attention button */
+		aprint_debug_dev(sc->sc_dev, "attention button pressed\n");
+		++ret;
+	}
+
+	if (ret)
+		pci_conf_write(sc->sc_pc, sc->sc_tag,
+		    sc->sc_pcie_off + 0x18, reg);
+
+	return ret;
+}
+
+static void
+ppb_event_worker(struct work *work, void *opaque)
+{
+	struct ppb_softc *sc = opaque;
+	struct ppb_work *pw = (struct ppb_work *)work;
+
+	if (sc->sc_pcibus == NULL)
+		return;
+
+	switch (pw->pw_event) {
+	case PPB_EV_INSERT:
+		(void)tsleep(pw, PRI_NONE, "ppbhp", hz);
+		ppb_rescan(sc);
+		break;
+	case PPB_EV_REMOVE:
+		config_detach_children(sc->sc_pcibus, DETACH_FORCE);
+		break;
+	}
+}
+
+static void
+ppb_rescan(struct ppb_softc *sc)
+{
+	static const int wildcard[PCICF_NLOCS] = {
+		PCICF_DEV_DEFAULT, PCICF_FUNCTION_DEFAULT
+	};
+
+	KASSERT(sc->sc_pcibus != NULL);
+
+	pci_enumerate_bus(device_private(sc->sc_pcibus),
+	    wildcard, ppb_fixup, NULL);
+	pcirescan(sc->sc_pcibus, "pci", wildcard);
+}
+
+static int
+ppb_fixup(struct pci_attach_args *pa)
+{
+	pcireg_t bhlcr;
+
+	bhlcr = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_BHLC_REG);
+	switch (PCI_HDRTYPE_TYPE(bhlcr)) {
+	case 0:
+		return ppb_fixup_type0(pa->pa_pc, pa->pa_tag,
+		    *pa->pa_bridgetag);
+	case 1:
+		return ppb_fixup_type1(pa->pa_pc, pa->pa_tag,
+		    *pa->pa_bridgetag);
+	default:
+		return 0;
+	}
+}
+
+static int
+ppb_fixup_type0(pci_chipset_tag_t pc, pcitag_t tag, pcitag_t bridgetag)
+{
+	pcireg_t blr, type, intr, csr;
+	int reg, line;
+	bus_addr_t base, io_base, io_limit, mem_base, mem_limit;
+	bus_size_t size, io_size, mem_size;
+
+	/*
+	 * The code below assumes that the address ranges on our
+	 * parent PCI Express bridge are really available and don't
+	 * overlap with other devices in the system.
+	 */
+
+	/* Figure out the I/O address range of the bridge. */
+	blr = pci_conf_read(pc, bridgetag, PPB_REG_IOSTATUS);
+	io_base = (blr & 0x000000f0) << 8;
+	io_limit = (blr & 0x0000f000) | 0x00000fff;
+	if (io_limit > io_base)
+		io_size = (io_limit - io_base + 1);
+	else
+		io_size = 0;
+
+	/* Figure out the memory mapped I/O address range of the bridge. */
+	blr = pci_conf_read(pc, bridgetag, PPB_REG_MEM);
+	mem_base = (blr & 0x0000fff0) << 16;
+	mem_limit = (blr & 0xffff0000) | 0x000fffff;
+	if (mem_limit > mem_base)
+		mem_size = (mem_limit - mem_base + 1);
+	else
+		mem_size = 0;
+
+	/* Assign resources to the Base Address Registers. */
+	for (reg = PCI_MAPREG_START; reg < PCI_MAPREG_END; reg += 4) {
+		if (!pci_mapreg_probe(pc, tag, reg, &type))
+			continue;
+		if (pci_mapreg_info(pc, tag, reg, type, &base, &size, NULL))
+			continue;
+		if (base != 0)
+			continue;
+		switch (type) {
+		case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_32BIT:
+		case PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_64BIT:
+			base = roundup(mem_base, size);
+			size += base - mem_base;
+			if (size > mem_size)
+				continue;
+			pci_conf_write(pc, tag, reg, base);
+			mem_base += size;
+			mem_size -= size;
+
+			csr = pci_conf_read(pc, tag, PCI_COMMAND_STATUS_REG);
+			csr |= PCI_COMMAND_MEM_ENABLE;
+			pci_conf_write(pc, tag, PCI_COMMAND_STATUS_REG, csr);
+			break;
+		case PCI_MAPREG_TYPE_IO:
+			base = roundup(io_base, size);
+			size += base - io_base;
+			if (size > io_size)
+				continue;
+			pci_conf_write(pc, tag, reg, base);
+			io_base += size;
+			io_size -= size;
+
+			csr = pci_conf_read(pc, tag, PCI_COMMAND_STATUS_REG);
+			csr |= PCI_COMMAND_IO_ENABLE;
+			pci_conf_write(pc, tag, PCI_COMMAND_STATUS_REG, csr);
+			break;
+		default:
+			break;
+		}
+
+		if (type & PCI_MAPREG_MEM_TYPE_64BIT)
+			reg += 4;
+	}
+
+	/*
+	 * Fill in the interrupt line for platforms that need it.
+	 *
+	 * XXX We assume that the interrupt line matches the line used
+	 * by the PCI Express bridge.  This may not be true.
+	 */
+	intr = pci_conf_read(pc, tag, PCI_INTERRUPT_REG);
+	if (PCI_INTERRUPT_PIN(intr) != PCI_INTERRUPT_PIN_NONE &&
+	    PCI_INTERRUPT_LINE(intr) == 0) {
+		/* Get the interrupt line from our parent. */
+		intr = pci_conf_read(pc, bridgetag, PCI_INTERRUPT_REG);
+		line = PCI_INTERRUPT_LINE(intr);
+
+		intr = pci_conf_read(pc, tag, PCI_INTERRUPT_REG);
+		intr &= ~(PCI_INTERRUPT_LINE_MASK << PCI_INTERRUPT_LINE_SHIFT);
+		intr |= line << PCI_INTERRUPT_LINE_SHIFT;
+		pci_conf_write(pc, tag, PCI_INTERRUPT_REG, intr);
+	}
+
+	return 0;
+}
+
+static int
+ppb_fixup_type1(pci_chipset_tag_t pc, pcitag_t tag, pcitag_t bridgetag)
+{
+	pcireg_t bhlcr, bir, csr, val;
+	int bus, dev, reg;
+
+	bir = pci_conf_read(pc, bridgetag, PPB_REG_BUSINFO);
+	if (PPB_BUSINFO_SUBORDINATE(bir) <= PPB_BUSINFO_SECONDARY(bir))
+		return 0;
+
+	bus = PPB_BUSINFO_SECONDARY(bir);
+	bir = pci_conf_read(pc, tag, PPB_REG_BUSINFO);
+	bir &= (0xff << 24);
+	bir |= bus++;
+	bir |= (bus << 8);
+	bir |= (bus << 16);
+	pci_conf_write(pc, tag, PPB_REG_BUSINFO, bir);
+
+	for (reg = PPB_REG_IOSTATUS; reg < PPB_REG_BRIDGECONTROL; reg += 4) {
+		val = pci_conf_read(pc, bridgetag, reg);
+		pci_conf_write(pc, tag, reg, val);
+	}
+
+	csr = pci_conf_read(pc, tag, PCI_COMMAND_STATUS_REG);
+	csr |= PCI_COMMAND_IO_ENABLE | PCI_COMMAND_MEM_ENABLE;
+	csr |= PCI_COMMAND_MASTER_ENABLE;
+	csr |= PCI_COMMAND_INVALIDATE_ENABLE;
+	csr |= PCI_COMMAND_SERR_ENABLE;
+	pci_conf_write(pc, tag, PCI_COMMAND_STATUS_REG, csr);
+
+	for (dev = 0; dev < pci_bus_maxdevs(pc, bus); dev++) {
+		tag = pci_make_tag(pc, bus, dev, 0);
+
+		bhlcr = pci_conf_read(pc, tag, PCI_BHLC_REG);
+		if (PCI_HDRTYPE_TYPE(bhlcr) != 0)
+			continue;
+
+		ppb_fixup_type0(pc, tag, bridgetag);
+	}
+
+	return 0;
+}
+
 static void
 ppbchilddet(device_t self, device_t child)
 {
