From: Andreas Schwab <schwab@issan.informatik.uni-dortmund.de>
Date: Mon, 1 Sep 97 10:55:21 +0200
To: linux-m68k@phil.uni-sb.de
In-Reply-To: Bernd Harries's message of Mon, 01 Sep 97 10:00:34 +0200
Subject: Re: L68K: kernel_unmap() ???? 
X-Yow: It's NO USE..  I've gone to ``CLUB MED''!!
References: <9709010800.AA18595@asrv01.atlas.de>
Sender: owner-linux-m68k@phil.uni-sb.de
Reply-To: linux-m68k@phil.uni-sb.de

Bernd Harries <harries@asrv01.atlas.de> writes:

|> Is there a function to revert kernel_map()s ? What would modules have to do on 
|> unload situations?

It's a bit problematic with the current kernel_map, because it uses
monotonically increasing virtual addresses, and unmapping something
inbetween would leave a hole that won't be reused.  Thus it might be
possible that you run out of the address space.  We likely need some more
sophisticated handling.  Still there will be the risk of fragmentation.

|> BTW: Andreas' new kernel_map() performs much better on my Medusa than the old 
|> one. Maybe he or someone else should now post it here publicly.

Nice to hear, i didn't even test it (i don't have anything to map). :-)
Here is the patch:

----------------------------------------------------------------------
--- arch/m68k/mm/memory.c.~1~	Mon Jul 28 17:23:22 1997
+++ arch/m68k/mm/memory.c	Thu Aug 28 13:18:51 1997
@@ -704,131 +704,152 @@
 /* Map some physical address range into the kernel address space. The
  * code is copied and adapted from map_chunk().
  */
+/* Rewritten by Andreas Schwab to remove all races. */
 
-unsigned long kernel_map(unsigned long paddr, unsigned long size,
-			 int nocacheflag, unsigned long *memavailp )
+static inline pte_t *
+pte_alloc_kernel_map(pmd_t *pmd, unsigned long address,
+		     unsigned long *memavailp)
+{
+	address = (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
+	if (pmd_none(*pmd)) {
+		pte_t *page = kernel_page_table(memavailp);
+		if (pmd_none(*pmd)) {
+			if (page) {
+				pmd_set(pmd, page);
+				return page + address;
+			}
+			pmd_set(pmd, BAD_PAGETABLE);
+			return NULL;
+		}
+		if (memavailp)
+			panic("kernel_map: slept during init?!?");
+		cache_page((unsigned long) page);
+		free_page((unsigned long) page);
+	}
+	if (pmd_bad(*pmd)) {
+		printk("Bad pmd in pte_alloc_kernel_map: %08lx\n",
+		       pmd_val(*pmd));
+		pmd_set(pmd, BAD_PAGETABLE);
+		return NULL;
+	}
+	return (pte_t *) pmd_page(*pmd) + address;
+}
+
+static inline void
+kernel_map_pte(pte_t *pte, unsigned long address, unsigned long size,
+	       unsigned long phys_addr, pgprot_t prot)
+{
+	unsigned long end;
+
+	address &= ~PMD_MASK;
+	end = address + size;
+	if (end > PMD_SIZE)
+		end = PMD_SIZE;
+	do {
+		pte_val(*pte) = phys_addr + pgprot_val(prot);
+		address += PAGE_SIZE;
+		phys_addr += PAGE_SIZE;
+		pte++;
+	} while (address < end);
+}
+
+static inline int
+kernel_map_pmd (pmd_t *pmd, unsigned long address, unsigned long size,
+		unsigned long phys_addr, pgprot_t prot,
+		unsigned long *memavailp)
+{
+	unsigned long end;
+
+	address &= ~PGDIR_MASK;
+	end = address + size;
+	if (end > PGDIR_SIZE)
+		end = PGDIR_SIZE;
+	phys_addr -= address;
+
+	if (CPU_IS_040_OR_060) {
+		do {
+			pte_t *pte = pte_alloc_kernel_map(pmd, address, memavailp);
+			if (!pte)
+				return -ENOMEM;
+			kernel_map_pte(pte, address, end - address,
+				       address + phys_addr, prot);
+			address = (address + PMD_SIZE) & PMD_MASK;
+			pmd++;
+		} while (address < end);
+	} else {
+		/* On the 68030 we use early termination page descriptors.
+		   Each one points to 64 pages (256K). */
+		int i = (address >> (PMD_SHIFT-4)) & 15;
+		do {
+			(&pmd_val(*pmd))[i++] = (address + phys_addr) | pgprot_val(prot);
+			address += PMD_SIZE / 16;
+		} while (address < end);
+	}
+	return 0;
+}
+
+unsigned long kernel_map(unsigned long phys_addr, unsigned long size,
+			 int cacheflag, unsigned long *memavailp)
 {
 #define STEP_SIZE	(256*1024)
 
 	static unsigned long vaddr = 0xe0000000; /* safe place */
-	unsigned long physaddr, retaddr;
-	pte_t *ktablep = NULL;
-	pmd_t *kpointerp;
-	pgd_t *page_dir;
-	int pindex;   /* index into pointer table */
-	int prot;
-	
-	/* Round down 'paddr' to 256 KB and adjust size */
-	physaddr = paddr & ~(STEP_SIZE-1);
-	size += paddr - physaddr;
-	retaddr = vaddr + (paddr - physaddr);
-	paddr = physaddr;
+	unsigned long retaddr, from, end;
+	pgd_t *dir;
+	pgprot_t prot;
+
+	/* Round down 'phys_addr' to 256 KB and adjust size */
+	size += phys_addr & (STEP_SIZE-1);
+	retaddr = vaddr + (phys_addr & (STEP_SIZE-1));
+	phys_addr &= ~(STEP_SIZE-1);
 	/* Round up the size to 256 KB. It doesn't hurt if too much is
-	 * mapped... */
+	   mapped... */
 	size = (size + STEP_SIZE - 1) & ~(STEP_SIZE-1);
+	from = vaddr;
+	/* Claim the address space immediately, we may be sleeping. */
+	vaddr += size;
+	end = vaddr;
 
 	if (CPU_IS_040_OR_060) {
-		prot = _PAGE_PRESENT | _PAGE_GLOBAL040;
-		switch( nocacheflag ) {
-		  case KERNELMAP_FULL_CACHING:
-			prot |= _PAGE_CACHE040;
+		pgprot_val(prot) = (_PAGE_PRESENT | _PAGE_GLOBAL040 |
+				    _PAGE_ACCESSED | _PAGE_DIRTY);
+		switch (cacheflag) {
+		case KERNELMAP_FULL_CACHING:
+			pgprot_val(prot) |= _PAGE_CACHE040;
 			break;
-		  case KERNELMAP_NOCACHE_SER:
-		  default:
-			prot |= _PAGE_NOCACHE_S;
+		case KERNELMAP_NOCACHE_SER:
+		default:
+			pgprot_val(prot) |= _PAGE_NOCACHE_S;
 			break;
-		  case KERNELMAP_NOCACHE_NONSER:
-			prot |= _PAGE_NOCACHE;
+		case KERNELMAP_NOCACHE_NONSER:
+			pgprot_val(prot) |= _PAGE_NOCACHE;
 			break;
-		  case KERNELMAP_NO_COPYBACK:
-			prot |= _PAGE_CACHE040W;
-			/* prot |= 0; */
+		case KERNELMAP_NO_COPYBACK:
+			pgprot_val(prot) |= _PAGE_CACHE040W;
 			break;
 		}
 	} else
-		prot = _PAGE_PRESENT |
-			   ((nocacheflag == KERNELMAP_FULL_CACHING ||
-				 nocacheflag == KERNELMAP_NO_COPYBACK) ? 0 : _PAGE_NOCACHE030);
-	
-	page_dir = pgd_offset_k(vaddr);
-	if (pgd_present(*page_dir)) {
-		kpointerp = (pmd_t *)pgd_page(*page_dir);
-		pindex = (vaddr >> 18) & 0x7f;
-		if (pindex != 0 && CPU_IS_040_OR_060) {
-			if (pmd_present(*kpointerp))
-				ktablep = (pte_t *)pmd_page(*kpointerp);
-			else {
-				ktablep = kernel_page_table (memavailp);
-				/* Make entries invalid */
-				memset( ktablep, 0, sizeof(long)*PTRS_PER_PTE);
-				pmd_set(kpointerp,ktablep);
-			}
-			ktablep += (pindex & 15)*64;
-		}
-	}
-	else {
-		/* we need a new pointer table */
-		kpointerp = get_kpointer_table ();
-		pgd_set(page_dir, (pmd_t *)kpointerp);
-		memset( kpointerp, 0, PTRS_PER_PMD*sizeof(pmd_t));
-		pindex = 0;
-	}
-
-	for (physaddr = paddr; physaddr < paddr + size; vaddr += STEP_SIZE) {
-
-		if (pindex > 127) {
-			/* we need a new pointer table */
-			kpointerp = get_kpointer_table ();
-			pgd_set(pgd_offset_k(vaddr), (pmd_t *)kpointerp);
-			memset( kpointerp, 0, PTRS_PER_PMD*sizeof(pmd_t));
-			pindex = 0;
-		}
-
-		if (CPU_IS_040_OR_060) {
-			int i;
-			unsigned long ktable;
-
-			/*
-			 * 68040, use page tables pointed to by the
-			 * kernel pointer table.
-			 */
-
-			if ((pindex & 15) == 0) {
-				/* Need new page table every 4M on the '040 */
-				ktablep = kernel_page_table (memavailp);
-				/* Make entries invalid */
-				memset( ktablep, 0, sizeof(long)*PTRS_PER_PTE);
-			}
-
-			ktable = VTOP(ktablep);
-
-			/*
-			 * initialize section of the page table mapping
-			 * this 1M portion.
-			 */
-			for (i = 0; i < 64; i++) {
-				pte_val(*ktablep++) = physaddr | prot;
-				physaddr += PAGE_SIZE;
-			}
-
-			/*
-			 * make the kernel pointer table point to the
-			 * kernel page table.
-			 */
-
-			((unsigned long *)kpointerp)[pindex++] = ktable | _PAGE_TABLE;
-
-		} else {
-			/*
-			 * 68030, use early termination page descriptors.
-			 * Each one points to 64 pages (256K).
-			 */
-			((unsigned long *)kpointerp)[pindex++] = physaddr | prot;
-			physaddr += 64 * PAGE_SIZE;
+		pgprot_val(prot) = (_PAGE_PRESENT | _PAGE_ACCESSED |
+				    _PAGE_DIRTY |
+				    ((cacheflag == KERNELMAP_FULL_CACHING ||
+				      cacheflag == KERNELMAP_NO_COPYBACK)
+				     ? 0 : _PAGE_NOCACHE030));
+
+	phys_addr -= from;
+	dir = pgd_offset_k(from);
+	while (from < end) {
+		pmd_t *pmd = pmd_alloc_kernel(dir, from);
+
+		if (kernel_map_pmd(pmd, from, end - from, phys_addr + from,
+				   prot, memavailp)) {
+			printk(KERN_ERR "kernel_map: out of memory\n");
+			return 0UL;
 		}
+		from = (from + PGDIR_SIZE) & PGDIR_MASK;
+		dir++;
 	}
 
-	return( retaddr );
+	return retaddr;
 }
 
 
----------------------------------------------------------------------

-- 
Andreas Schwab                                      "And now for something
schwab@issan.informatik.uni-dortmund.de              completely different"
