From: Andreas Schwab <schwab@issan.informatik.uni-dortmund.de>
Date: Fri, 16 May 97 10:26:27 +0200
To: linux-m68k@phil.uni-sb.de
Subject: L68K: current in 2.1.37
X-Yow: In Newark the laundromats are open 24 hours a day!
Sender: owner-linux-m68k@phil.uni-sb.de
Reply-To: linux-m68k@phil.uni-sb.de

Hi!

This patch implements the permanent loading of current in register a2.  It
should be applied on top of patch-2.1.37.  I haven't yet made any
performance tests, this is more a proof-of-concept.  I have chosen
register a2 because it makes some things simpler, but this is
non-critical.  But beware, changing it invalidates the module interface in
a way that even CONFIG_MODVERSIONS cannot detect.

I have also tried the intel way of using the stack pointer for calculating
current, but that made it worse, because bitwise ops require a data
register, making it both bigger and slower.  I'm still using this trick to
load current on kernel entry.

Btw., there were about 930 references to current_set in my kernel in
version 2.1.26 (you need to count the relocations, not only the undefined
symbol references).  On the other hand, there are more than 230 functions
that are big enough to already use up all address registers.  This
includes important functions like zeromap_page_range, generic_file_read,
rw_swap_page, brw_page, block_write, block_read, load_elf_binary.  They
will surely suffer from the missing register.

Andreas.

----------------------------------------------------------------------
table
`!"#$%&'()*+,-./0123456789:;<=>?
@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_
--- arch/m68k/Makefile.~2~	Sat Apr 26 01:21:40 1997
+++ arch/m68k/Makefile	Thu May 15 13:13:47 1997
@@ -26,7 +26,7 @@
 
 LINKFLAGS = -T $(TOPDIR)/arch/m68k/vmlinux.lds
 
-CFLAGS := $(CFLAGS) -pipe -fno-strength-reduce
+CFLAGS += -pipe -fno-stregth-reduce -ffixed-a2
 
 ifdef CONFIG_OPTIMIZE_040
 CFLAGS := $(CFLAGS) -m68040
--- arch/m68k/atari/ataints.c.~2~	Wed May 14 17:40:28 1997
+++ arch/m68k/atari/ataints.c	Thu May 15 13:21:19 1997
@@ -163,15 +163,19 @@
 
 #define	MFP_MK_BASE	"0xfa13"
 
-/* This must agree with head.S.  */
-#define ORIG_DO "0x20"
-#define FORMATVEC "0x2E"
-#define SR "0x28"
+/* This must agree with entry.S.  */
+#define ORIG_DO "0x24"
+#define FORMATVEC "0x32"
+#define SR "0x2C"
 #define SAVE_ALL				\
 	"clrl	%%sp@-;"    /* stk_adj */	\
 	"pea	-1:w;"	    /* orig d0 = -1 */	\
 	"movel	%%d0,%%sp@-;" /* d0 */		\
-	"moveml	%%d1-%%d5/%%a0-%%a1,%%sp@-"
+	"moveml	%%d1-%%d5/%%a0-%%a2,%%sp@-"
+#define GET_CURRENT(tmp) \
+	"movel	%%sp,"#tmp";" \
+	"andw	#-8192,"#tmp";" \
+	"movel	"#tmp",%%a2"
 
 #define	BUILD_SLOW_IRQ(n)						   \
 asmlinkage void IRQ_NAME(n);						   \
@@ -181,6 +185,7 @@
 SYMBOL_NAME_STR(atari_slow_irq_) #n "_handler:\t"			   \
 "	addql	#1,"SYMBOL_NAME_STR(local_irq_count)"\n"		   \
 	SAVE_ALL "\n"							   \
+	GET_CURRENT(%%d0) "\n"						   \
 "	andb	#~(1<<(" #n "&7)),"	/* mask this interrupt */	   \
 	"("MFP_MK_BASE"+(((" #n "&8)^8)>>2)+((" #n "&16)<<3)):w\n"	   \
 "	bfextu	%%sp@("SR"){#5,#3},%%d0\n" /* get old IPL from stack frame */ \
@@ -283,7 +288,8 @@
 	orw 	#0x700,%%sr		/* disable all interrupts */
 "SYMBOL_NAME_STR(atari_prio_irq_handler) ":\t
 	addql	#1,"SYMBOL_NAME_STR(local_irq_count)"\n"
-	SAVE_ALL "
+	SAVE_ALL "\n"
+	GET_CURRENT(%%d0) "
 	/* get vector number from stack frame and convert to source */
 	bfextu	%%sp@(" FORMATVEC "){#4,#10},%%d0
 	subw	#(0x40-8),%%d0
--- arch/m68k/kernel/entry.S.~4~	Fri May  9 21:45:56 1997
+++ arch/m68k/kernel/entry.S	Thu May 15 19:22:21 1997
@@ -33,12 +33,13 @@
  *	10(sp) - d5
  *	14(sp) - a0
  *	18(sp) - a1
- *	1C(sp) - d0
- *	20(sp) - orig_d0
- *	24(sp) - stack adjustment
- *	28(sp) - sr
- *	2A(sp) - pc
- *	2E(sp) - format & vector
+ *	1C(sp) - a2
+ *	20(sp) - d0
+ *	24(sp) - orig_d0
+ *	28(sp) - stack adjustment
+ *	2C(sp) - sr
+ *	2E(sp) - pc
+ *	32(sp) - format & vector
  */
 
 /*
@@ -47,6 +48,11 @@
  *               number 0 in the 'current_set' list.
  */
 
+/*
+ * 97/05/14 Andreas: Register %a2 is now set to the current task throughout
+ *		     the whole kernel.
+ */
+
 #include <linux/sys.h>
 #include <linux/config.h>
 #include <linux/linkage.h>
@@ -57,6 +63,8 @@
 .globl SYMBOL_NAME(kgdb_registers)
 #endif
 
+#define curptr a2
+
 LENOSYS = 38
 
 /*
@@ -80,15 +88,15 @@
 #define	MAX_NOINT_IPL	0
 #endif /* machine compilation types */ 
 
-LD0		= 0x1C
-LORIG_D0	= 0x20
-LSR		= 0x28
-LFORMATVEC	= 0x2E
+LD0		= 0x20
+LORIG_D0	= 0x24
+LSR		= 0x2C
+LFORMATVEC	= 0x32
 
 /*
  * This defines the normal kernel pt-regs layout.
  *
- * regs are a2-a6 and d6-d7 preserved by C code
+ * regs a3-a6 and d6-d7 are preserved by C code
  * the kernel doesn't mess with usp unless it needs to
  */
 #ifndef CONFIG_KGDB
@@ -96,7 +104,7 @@
 	clrl	%sp@-;    /* stk_adj */	\
 	movel	%d0,%sp@-; /* orig d0 */	\
 	movel	%d0,%sp@-; /* d0 */	\
-	moveml	%d1-%d5/%a0-%a1,%sp@-
+	moveml	%d1-%d5/%a0-%a1/%curptr,%sp@-;
 #else
 /* Need to save the "missing" registers for kgdb...
  */
@@ -104,25 +112,30 @@
 	clrl	%sp@-;    /* stk_adj */				\
 	movel	%d0,%sp@-; /* orig d0 */			\
 	movel	%d0,%sp@-; /* d0 */				\
-	moveml	%d1-%d5/%a0-%a1,%sp@-;				\
+	moveml	%d1-%d5/%a0-%a1/%curptr,%sp@-;			\
 	moveml	%d6-%d7,SYMBOL_NAME(kgdb_registers)+GDBOFFA_D6;	\
-	moveml	%a2-%a6,SYMBOL_NAME(kgdb_registers)+GDBOFFA_A2
+	moveml	%a3-%a6,SYMBOL_NAME(kgdb_registers)+GDBOFFA_A3;
 #endif
 
 #define RESTORE_ALL			\
-	moveml	%sp@+,%a0-%a1/%d1-%d5;	\
+	moveml	%sp@+,%a0-%a1/%curptr/%d1-%d5;	\
 	movel	%sp@+,%d0;		\
 	addql	#4,%sp;	 /* orig d0 */	\
 	addl	%sp@+,%sp; /* stk adj */	\
 	rte
 
-#define SWITCH_STACK_SIZE (7*4+4)	/* includes return address */
+#define SWITCH_STACK_SIZE (6*4+4)	/* includes return address */
 
 #define SAVE_SWITCH_STACK \
-	moveml	%a2-%a6/%d6-%d7,%sp@-
+	moveml	%a3-%a6/%d6-%d7,%sp@-
 
 #define RESTORE_SWITCH_STACK \
-	moveml	%sp@+,%a2-%a6/%d6-%d7
+	moveml	%sp@+,%a3-%a6/%d6-%d7
+
+#define GET_CURRENT(tmp) \
+	movel	%sp,tmp; \
+	andw	&-8192,tmp; \
+	movel	tmp,%curptr;
 
 .globl SYMBOL_NAME(system_call), SYMBOL_NAME(buserr), SYMBOL_NAME(trap)
 .globl SYMBOL_NAME(resume), SYMBOL_NAME(ret_from_exception)
@@ -139,6 +152,7 @@
 					| signifies that the stack frame
 					| is NOT for syscall
 
+	GET_CURRENT(%d0)
 	movel	%sp,%sp@- 		| stack frame pointer argument
 	bsrl	SYMBOL_NAME(buserr_c)
 	addql	#4,%sp
@@ -150,6 +164,7 @@
 	movel	%d0,%sp@(LORIG_D0)	| a -1 in the ORIG_D0 field
 					| signifies that the stack frame
 					| is NOT for syscall
+	GET_CURRENT(%d0)
 	movel	%sp,%sp@- 		| stack frame pointer argument
 	bsrl	SYMBOL_NAME(trap_c)
 	addql	#4,%sp
@@ -190,6 +205,7 @@
 	SAVE_ALL
 	movel	%d0,%d2
 
+	GET_CURRENT(%d0)
 	| save top of frame
 	pea	%sp@
 	jbsr	SYMBOL_NAME(set_esp0)
@@ -197,8 +213,7 @@
 
 	cmpl	#NR_syscalls,%d2
 	jcc	badsys
-	movel	SYMBOL_NAME(current_set),%a0
-	btst	#5,%a0@(LTASK_FLAGS+3)	| PF_TRACESYS
+	btst	#5,%curptr@(LTASK_FLAGS+3)	| PF_TRACESYS
 	jne	do_trace
 	jbsr	@(SYMBOL_NAME(sys_call_table),%d2:l:4)@(0)
 	movel	%d0,%sp@(LD0)		| save the return value
@@ -208,24 +223,23 @@
 	bnes	2f			| if so, skip resched, signals
 	tstl	SYMBOL_NAME(need_resched)
 	jne	SYMBOL_NAME(reschedule)
-	movel	SYMBOL_NAME(current_set),%a0
-	cmpl	#SYMBOL_NAME(task),%a0	| task[0] cannot have signals
+	cmpl	#SYMBOL_NAME(task),%curptr	| task[0] cannot have signals
 	jeq	2f
-	bclr	#5,%a0@(LTASK_FLAGS+1)	| check for delayed trace
+	bclr	#5,%curptr@(LTASK_FLAGS+1)	| check for delayed trace
 	jne	do_delayed_trace
 5:
-	tstl	%a0@(LTASK_STATE)	| state
+	tstl	%curptr@(LTASK_STATE)	| state
 	jne	SYMBOL_NAME(reschedule)
-	tstl	%a0@(LTASK_COUNTER)	| counter
+	tstl	%curptr@(LTASK_COUNTER)	| counter
 	jeq	SYMBOL_NAME(reschedule)
 
-	movel	%a0@(LTASK_BLOCKED),%d0
+	movel	%curptr@(LTASK_BLOCKED),%d0
 	movel	%d0,%d1			| save blocked in d1 for sig handling
 	notl	%d0
-	btst	#4,%a0@(LTASK_FLAGS+3)	| PF_PTRACED
+	btst	#4,%curptr@(LTASK_FLAGS+3)	| PF_PTRACED
 	jeq	1f
 	moveq	#-1,%d0			| let the debugger see all signals
-1:	andl	%a0@(LTASK_SIGNAL),%d0
+1:	andl	%curptr@(LTASK_SIGNAL),%d0
 	jne	Lsignal_return
 2:	RESTORE_ALL
 
@@ -248,7 +262,6 @@
 	jbsr	SYMBOL_NAME(send_sig)
 	addql	#8,%sp
 	addql	#4,%sp
-	movel	SYMBOL_NAME(current_set),%a0
 	jra	5b
 
 /*
@@ -260,6 +273,7 @@
 	movel	%d0,%sp@(LORIG_D0)	| a -1 in the ORIG_D0 field
 					| signifies that the stack frame
 					| is NOT for syscall
+	GET_CURRENT(%d0)
 	addql	#1,SYMBOL_NAME(local_irq_count)
 					|  put exception # in d0
 	bfextu %sp@(LFORMATVEC){#4,#10},%d0
@@ -392,7 +406,7 @@
 3:
 
 	/* get pointer to tss struct (a1 contains new task) */
-	movel	%a1,SYMBOL_NAME(current_set)
+	movel	%a1,%curptr
 	addl	%d1,%a1
 
 	/* Skip address space switching if they are the same. */
--- arch/m68k/kernel/head.S.~1~	Mon Mar 24 16:47:36 1997
+++ arch/m68k/kernel/head.S	Thu May 15 15:43:36 1997
@@ -223,17 +223,9 @@
 	movel	%d0,%a0@		/* save cache mode for page tables */
 
 /*
- * raise interrupt level with MASTER bit set, copy isp to msp (if not 68060)
+ * raise interrupt level
  */
-#ifdef FROM_PL9
-	movew	#0x3700,%sr
-	is_060(1f)
-	movec	%isp,%d0
-	movel	%d0,%sp
-1:
-#else
 	movew	#0x2700,%sr
-#endif
 
 /*
    If running on an Atari, determine the I/O base of the
@@ -896,8 +888,10 @@
 
 /*
  * Setup initial stack pointer
+ * We need to get current loaded up with our first task...
  */
-	lea	SYMBOL_NAME(init_user_stack)+PAGESIZE,%sp
+	lea	SYMBOL_NAME(init_task_union),%a2
+	lea	8192(%a2),%sp
 
 /* jump to the kernel start */
 	putr()
--- arch/m68k/kernel/kgdb.c.~1~	Fri Apr 11 18:53:35 1997
+++ arch/m68k/kernel/kgdb.c	Thu May 15 13:18:06 1997
@@ -734,18 +734,18 @@
 
 /* offsets in struct frame */
 #define FRAMEOFF_D1		"0"		/* d1..d5 */
-#define FRAMEOFF_A0		"5*4"	/* a0..a1 */
-#define FRAMEOFF_D0		"7*4"
-#define FRAMEOFF_SR		"10*4"
-#define FRAMEOFF_PC		"10*4+2"
-#define FRAMEOFF_VECTOR	"11*4+2"
+#define FRAMEOFF_A0		"5*4"	/* a0..a2 */
+#define FRAMEOFF_D0		"8*4"
+#define FRAMEOFF_SR		"11*4"
+#define FRAMEOFF_PC		"11*4+2"
+#define FRAMEOFF_VECTOR	"12*4+2"
 
 /* offsets in struct gdb_regs */
 #define GDBOFF_D0		"0"
 #define GDBOFF_D1		"1*4"
 #define GDBOFF_D6		"6*4"
 #define GDBOFF_A0		"8*4"
-#define GDBOFF_A2		"10*4"
+#define GDBOFF_A3		"11*4"
 #define GDBOFF_A7		"15*4"
 #define GDBOFF_VECTOR	"16*4"
 #define GDBOFF_SR		"16*4+2"
@@ -768,8 +768,8 @@
   "		movel	%a0@("FRAMEOFF_D0"),%a1@("GDBOFF_D0")\n"
   "		moveml	%a0@("FRAMEOFF_D1"),%d1-%d5\n"
   "		moveml	%d1-%d5,%a1@("GDBOFF_D1")\n"
-  "		moveml	%a0@("FRAMEOFF_A0"),%d0-%d1\n"
-  "		moveml	%d0-%d1,%a1@("GDBOFF_A0")\n"
+  "		moveml	%a0@("FRAMEOFF_A0"),%d0-%d2\n"
+  "		moveml	%d0-%d2,%a1@("GDBOFF_A0")\n"
   /* copy sr and pc */
   "		movel	%a0@("FRAMEOFF_PC"),%a1@("GDBOFF_PC")\n"
   "		movew	%a0@("FRAMEOFF_SR"),%a1@("GDBOFF_SR")\n"
--- arch/m68k/kernel/process.c.~2~	Wed May 14 17:44:30 1997
+++ arch/m68k/kernel/process.c	Thu May 15 19:21:18 1997
@@ -30,6 +30,7 @@
 #include <asm/traps.h>
 #include <asm/machdep.h>
 #include <asm/setup.h>
+#include <asm/pgtable.h>
 
 /*
  * Initial task structure. Make this a per-architecture thing,
@@ -37,15 +38,15 @@
  * alignment requirements and potentially different initial
  * setup.
  */
-static unsigned long init_kernel_stack[1024] = { STACK_MAGIC, };
-unsigned long init_user_stack[1024] = { STACK_MAGIC, };
 static struct vm_area_struct init_mmap = INIT_MMAP;
 static struct fs_struct init_fs = INIT_FS;
 static struct files_struct init_files = INIT_FILES;
 static struct signal_struct init_signals = INIT_SIGNALS;
-
 struct mm_struct init_mm = INIT_MM;
-struct task_struct init_task = INIT_TASK;
+
+union task_union init_task_union
+	__attribute__((section("init_task"), aligned(2*PAGE_SIZE)))
+	= { task: INIT_TASK };
 
 asmlinkage void ret_from_exception(void);
 
@@ -71,6 +72,7 @@
 #else /* portable version */
 			__asm__("stop #0x2000" : : : "cc");
 #endif /* machine compilation types */ 
+		run_task_queue(&tq_scheduler);
 		schedule();
 	}
 	ret = 0;
@@ -101,8 +103,8 @@
 	printk("\n");
 	printk("Format %02x  Vector: %04x  PC: %08lx  Status: %04x\n",
 	       regs->format, regs->vector, regs->pc, regs->sr);
-	printk("ORIG_D0: %08lx  D0: %08lx  A1: %08lx\n",
-	       regs->orig_d0, regs->d0, regs->a1);
+	printk("ORIG_D0: %08lx  D0: %08lx  A2: %08lx  A1: %08lx\n",
+	       regs->orig_d0, regs->d0, regs->a2, regs->a1);
 	printk("A0: %08lx  D5: %08lx  D4: %08lx\n",
 	       regs->a0, regs->d5, regs->d4);
 	printk("D3: %08lx  D2: %08lx  D1: %08lx\n",
@@ -169,8 +171,8 @@
 	struct switch_stack * childstack, *stack;
 	unsigned long stack_offset, *retp;
 
-	stack_offset = PAGE_SIZE - sizeof(struct pt_regs);
-	childregs = (struct pt_regs *) (p->kernel_stack_page + stack_offset);
+	stack_offset = 2*PAGE_SIZE - sizeof(struct pt_regs);
+	childregs = (struct pt_regs *) ((unsigned long) p + stack_offset);
 
 	*childregs = *regs;
 	childregs->d0 = 0;
@@ -256,7 +258,7 @@
 	dump->regs.d7 = sw->d7;
 	dump->regs.a0 = regs->a0;
 	dump->regs.a1 = regs->a1;
-	dump->regs.a2 = sw->a2;
+	dump->regs.a2 = regs->a2;
 	dump->regs.a3 = sw->a3;
 	dump->regs.a4 = sw->a4;
 	dump->regs.a5 = sw->a5;
--- arch/m68k/kernel/ptrace.c.~2~	Wed May 14 17:44:53 1997
+++ arch/m68k/kernel/ptrace.c	Thu May 15 15:21:51 1997
@@ -47,7 +47,7 @@
 static int regoff[] = {
 	PT_REG(d1), PT_REG(d2), PT_REG(d3), PT_REG(d4),
 	PT_REG(d5), SW_REG(d6), SW_REG(d7), PT_REG(a0),
-	PT_REG(a1), SW_REG(a2), SW_REG(a3), SW_REG(a4),
+	PT_REG(a1), PT_REG(a2), SW_REG(a3), SW_REG(a4),
 	SW_REG(a5), SW_REG(a6), PT_REG(d0), -1,
 	PT_REG(orig_d0), PT_REG(sr), PT_REG(pc),
 };
@@ -104,7 +104,7 @@
 repeat:
 	pgdir = pgd_offset(vma->vm_mm, addr);
 	if (pgd_none(*pgdir)) {
-		do_no_page(tsk, vma, addr, 0);
+		handle_mm_fault(vma, addr, 0);
 		goto repeat;
 	}
 	if (pgd_bad(*pgdir)) {
@@ -114,7 +114,7 @@
 	}
 	pgmiddle = pmd_offset(pgdir,addr);
 	if (pmd_none(*pgmiddle)) {
-		do_no_page(tsk, vma, addr, 0);
+		handle_mm_fault(vma, addr, 0);
 		goto repeat;
 	}
 	if (pmd_bad(*pgmiddle)) {
@@ -125,7 +125,7 @@
 	}
 	pgtable = pte_offset(pgmiddle, addr);
 	if (!pte_present(*pgtable)) {
-		do_no_page(tsk, vma, addr, 0);
+		handle_mm_fault(vma, addr, 0);
 		goto repeat;
 	}
 	page = pte_page(*pgtable);
@@ -156,7 +156,7 @@
 repeat:
 	pgdir = pgd_offset(vma->vm_mm, addr);
 	if (!pgd_present(*pgdir)) {
-		do_no_page(tsk, vma, addr, 1);
+		handle_mm_fault(vma, addr, 1);
 		goto repeat;
 	}
 	if (pgd_bad(*pgdir)) {
@@ -166,7 +166,7 @@
 	}
 	pgmiddle = pmd_offset(pgdir,addr);
 	if (pmd_none(*pgmiddle)) {
-		do_no_page(tsk, vma, addr, 1);
+		handle_mm_fault(vma, addr, 1);
 		goto repeat;
 	}
 	if (pmd_bad(*pgmiddle)) {
@@ -177,12 +177,12 @@
 	}
 	pgtable = pte_offset(pgmiddle, addr);
 	if (!pte_present(*pgtable)) {
-		do_no_page(tsk, vma, addr, 1);
+		handle_mm_fault(vma, addr, 1);
 		goto repeat;
 	}
 	page = pte_page(*pgtable);
 	if (!pte_write(*pgtable)) {
-		do_wp_page(tsk, vma, addr, 2);
+		handle_mm_fault(vma, addr, 1);
 		goto repeat;
 	}
 /* this is a hack for non-kernel-mapped video buffers and similar */
--- arch/m68k/kernel/traps.c.~2~	Wed May 14 17:47:27 1997
+++ arch/m68k/kernel/traps.c	Thu May 15 19:20:18 1997
@@ -933,16 +933,15 @@
 #endif
 	console_verbose();
 	printk("%s: %08x\n",str,nr);
-	printk("PC: [<%08lx>]\nSR: %04x  SP: %p\n", fp->pc, fp->sr, fp);
+	printk("PC: [<%08lx>]\nSR: %04x  SP: %p  a2: %08lx\n",
+	       fp->pc, fp->sr, fp, fp->a2);
 	printk("d0: %08lx    d1: %08lx    d2: %08lx    d3: %08lx\n",
 	       fp->d0, fp->d1, fp->d2, fp->d3);
 	printk("d4: %08lx    d5: %08lx    a0: %08lx    a1: %08lx\n",
 	       fp->d4, fp->d5, fp->a0, fp->a1);
 
-	if (STACK_MAGIC != *(unsigned long *)current->kernel_stack_page)
-		printk("Corrupted stack page\n");
 	printk("Process %s (pid: %d, stackpage=%08lx)\n",
-		current->comm, current->pid, current->kernel_stack_page);
+		current->comm, current->pid, PAGE_SIZE+(unsigned long)current);
 #ifdef CONFIG_KGDB
 	}
 #endif
--- arch/m68k/lib/semaphore.S.~1~	Mon Feb 17 19:08:32 1997
+++ arch/m68k/lib/semaphore.S	Wed May 14 19:06:26 1997
@@ -19,8 +19,7 @@
 	movel %a1,-(%sp)
 	jbsr SYMBOL_NAME(__down)
 	movel (%sp)+,%a1
-	movel (%sp)+,%d0
-	movel (%sp)+,%d1
+	moveml (%sp)+,%a0/%d0/%d1
 	rts
 
 ENTRY(__down_failed_interruptible)
@@ -30,6 +29,7 @@
 	jbsr SYMBOL_NAME(__down_interruptible)
 	movel (%sp)+,%a1
 	movel (%sp)+,%d1
+	movel (%sp)+,%a0
 	rts
 
 ENTRY(__up_wakeup)
@@ -37,6 +37,5 @@
 	movel %a1,-(%sp)
 	jbsr SYMBOL_NAME(__up)
 	movel (%sp)+,%a1
-	movel (%sp)+,%d0
-	movel (%sp)+,%d1
+	moveml (%sp)+,%a0/%d0/%d1
 	rts
--- arch/m68k/mm/fault.c.~1~	Wed Jan 15 17:39:51 1997
+++ arch/m68k/mm/fault.c	Wed May 14 21:50:33 1997
@@ -32,14 +32,10 @@
 asmlinkage int do_page_fault(struct pt_regs *regs, unsigned long address,
 			      unsigned long error_code)
 {
-	void (*handler)(struct task_struct *,
-			struct vm_area_struct *,
-			unsigned long,
-			int);
 	struct task_struct *tsk = current;
 	struct mm_struct *mm = tsk->mm;
 	struct vm_area_struct * vma;
-	unsigned long fixup, fault_pc;
+	unsigned long fixup;
 	int write;
 
 #ifdef DEBUG
@@ -73,10 +69,8 @@
  */
 good_area:
 	write = 0;
-	handler = do_no_page;
 	switch (error_code & 3) {
 		default:	/* 3: write, present */
-			handler = do_wp_page;
 			/* fall through */
 		case 2:		/* write, not present */
 			if (!(vma->vm_flags & VM_WRITE))
@@ -89,7 +83,7 @@
 			if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
 				goto bad_area;
 	}
-	handler(tsk, vma, address, write);
+	handle_mm_fault(vma, address, write);
 	up(&mm->mmap_sem);
 
 	/* There seems to be a missing invalidate somewhere in do_no_page.
@@ -108,10 +102,10 @@
 	up(&mm->mmap_sem);
 
 	/* Are we prepared to handle this fault?  */
-	fault_pc = regs->pc;
-	if ((fixup = search_exception_table(fault_pc)) != 0) {
+	if ((fixup = search_exception_table(regs->pc)) != 0) {
 		struct pt_regs *tregs;
-		printk(KERN_DEBUG "Exception at [<%lx>] (%lx)\n", fault_pc, fixup);
+		printk(KERN_DEBUG "%s: Exception at [<%lx>] (%lx)\n",
+		       current->comm, regs->pc, fixup);
 		/* Create a new four word stack frame, discarding the old
 		   one.  */
 		regs->stkadj = frame_extra_sizes[regs->format];
--- arch/m68k/mm/memory.c.~1~	Mon Feb 17 17:33:14 1997
+++ arch/m68k/mm/memory.c	Wed May 14 22:01:38 1997
@@ -280,6 +280,8 @@
     return( (vaddr & mask) == (base & mask) );
 }
 
+static unsigned long mm_vtop_fallback (unsigned long);
+
 /*
  * The following two routines map from a physical address to a kernel
  * virtual address and vice versa.
@@ -301,7 +303,13 @@
 			offset += m68k_memory[i].size;
 		i++;
 	}while (i < m68k_num_memory);
+	return mm_vtop_fallback(vaddr);
+}
 
+/* Separate function to make the common case faster (needs to save less
+   registers) */
+static unsigned long mm_vtop_fallback (unsigned long vaddr)
+{
 	/* not in one of the memory chunks; test for applying transparent
 	 * translation */
 
--- arch/m68k/vmlinux.lds.~1~	Fri Apr 18 20:25:48 1997
+++ arch/m68k/vmlinux.lds	Thu May 15 09:12:03 1997
@@ -9,6 +9,7 @@
   .text : {
 	*(.text)
 	*(.fixup)
+	*(.text.lock)		/* out-of-line lock text */
 	*(.gnu.warning)
 	} = 0x4e75
   .rodata : { *(.rodata) }
@@ -29,6 +30,9 @@
 	*(.data)
 	CONSTRUCTORS
 	}
+
+  . = ALIGN(8192);
+  init_task : { *(init_task) }	/* The initial task and kernel stack */
 
   _edata = .;			/* End of data section */
 
--- drivers/char/m68kserial.c.~2~	Sat Apr 26 01:41:54 1997
+++ drivers/char/m68kserial.c	Wed May 14 18:05:22 1997
@@ -284,7 +284,7 @@
 	if (!tty)
 		return;
 
-	if (clear_bit(RS_EVENT_WRITE_WAKEUP, &info->event)) {
+	if (test_and_clear_bit(RS_EVENT_WRITE_WAKEUP, &info->event)) {
 		if ((tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) &&
 		    tty->ldisc.write_wakeup)
 			(tty->ldisc.write_wakeup)(tty);
--- fs/proc/array.c.~3~	Wed May 14 18:22:55 1997
+++ fs/proc/array.c	Wed May 14 22:58:28 1997
@@ -471,10 +471,9 @@
 	    unsigned long stack_page;
 	    int count = 0;
 	    extern int sys_pause (void);
+	    extern void tqueue_bh (void);
 
-	    stack_page = p->kernel_stack_page;
-	    if (!stack_page)
-		    return 0;
+	    stack_page = 2*PAGE_SIZE + (unsigned long)p;
 	    fp = ((struct switch_stack *)p->tss.ksp)->a6;
 	    do {
 		    if (fp < stack_page || fp >= 4088+stack_page)
@@ -482,7 +481,7 @@
 		    pc = ((unsigned long *)fp)[1];
 		/* FIXME: This depends on the order of these functions. */
 		    if ((pc < (unsigned long) __down
-		         || pc >= (unsigned long) add_timer)
+		         || pc >= (unsigned long) tqueue_bh)
 		        && (pc < (unsigned long) schedule
 			    || pc >= (unsigned long) sys_pause))
 		      return pc;
--- include/asm-m68k/atomic.h.~1~	Sat Apr 26 00:12:26 1997
+++ include/asm-m68k/atomic.h	Sat Apr 26 01:59:00 1997
@@ -11,7 +11,7 @@
  */
 
 typedef struct { int counter; } atomic_t;
-#define ATOMIC_INIT	{ 0 }
+#define ATOMIC_INIT(i)	{ (i) }
 
 #define atomic_read(v)		((v)->counter)
 #define atomic_set(v, i)	(((v)->counter) = i)
--- include/asm-m68k/bitops.h.~1~	Sat Sep  7 12:34:30 1996
+++ include/asm-m68k/bitops.h	Wed May 14 18:55:37 1997
@@ -14,7 +14,25 @@
  * They use the standard big-endian m680x0 bit ordering.
  */
 
-extern __inline__ int set_bit(int nr,void * vaddr)
+extern __inline__ void set_bit(int nr,void * vaddr)
+{
+	__asm__ __volatile__ ("bfset %1@{%0:#1}"
+		: : "d" (nr^31), "a" (vaddr));
+}
+
+extern __inline__ void clear_bit(int nr, void * vaddr)
+{
+	__asm__ __volatile__ ("bfclr %1@{%0:#1}"
+		: : "d" (nr^31), "a" (vaddr));
+}
+
+extern __inline__ void change_bit(int nr, void * vaddr)
+{
+	__asm__ __volatile__ ("bfchg %1@{%0:#1}"
+		: : "d" (nr^31), "a" (vaddr));
+}
+
+extern __inline__ int test_and_set_bit(int nr, void * vaddr)
 {
 	char retval;
 
@@ -24,7 +42,7 @@
 	return retval;
 }
 
-extern __inline__ int clear_bit(int nr, void * vaddr)
+extern __inline__ int test_and_clear_bit(int nr, void * vaddr)
 {
 	char retval;
 
@@ -34,7 +52,7 @@
 	return retval;
 }
 
-extern __inline__ int change_bit(int nr, void * vaddr)
+extern __inline__ int test_and_change_bit(int nr, void * vaddr)
 {
 	char retval;
 
--- include/asm-m68k/current.h.~1~	Wed Jan 15 17:47:11 1997
+++ include/asm-m68k/current.h	Thu May 15 13:14:29 1997
@@ -1,12 +1,6 @@
 #ifndef _M68K_CURRENT_H
 #define _M68K_CURRENT_H
 
-/* Some architectures may want to do something "clever" here since
- * this is the most frequently accessed piece of data in the entire
- * kernel.  For an example, see the Sparc implementation where an
- * entire register is hard locked to contain the value of current.
- */
-extern struct task_struct *current_set[NR_CPUS];
-#define current (current_set[smp_processor_id()])	/* Current on this processor */
+register struct task_struct *current __asm__("%a2");
 
 #endif /* !(_M68K_CURRENT_H) */
--- include/asm-m68k/elf.h.~1~	Sun Oct 27 01:09:24 1996
+++ include/asm-m68k/elf.h	Thu May 15 15:28:50 1997
@@ -44,6 +44,7 @@
 	pr_reg[4] = regs->d5;						\
 	pr_reg[7] = regs->a0;						\
 	pr_reg[8] = regs->a1;						\
+	pr_reg[9] = regs->a2;						\
 	pr_reg[14] = regs->d0;						\
 	pr_reg[15] = rdusp();						\
 	pr_reg[16] = regs->orig_d0;					\
@@ -54,7 +55,6 @@
 	  struct switch_stack *sw = ((struct switch_stack *)regs) - 1;	\
 	  pr_reg[5] = sw->d6;						\
 	  pr_reg[6] = sw->d7;						\
-	  pr_reg[9] = sw->a2;						\
 	  pr_reg[10] = sw->a3;						\
 	  pr_reg[11] = sw->a4;						\
 	  pr_reg[12] = sw->a5;						\
--- include/asm-m68k/kgdb.h.~1~	Mon Feb 17 17:38:18 1997
+++ include/asm-m68k/kgdb.h	Thu May 15 13:17:00 1997
@@ -25,7 +25,7 @@
 #define GDBREG_FPIAR 28
 
 #define GDBOFFA_D6	(6*4)
-#define GDBOFFA_A2	(10*4)
+#define GDBOFFA_A3	(11*4)
 
 #define NUMREGSBYTES	180
 
--- include/asm-m68k/processor.h.~2~	Wed May 14 21:06:57 1997
+++ include/asm-m68k/processor.h	Wed May 14 21:07:25 1997
@@ -47,7 +47,7 @@
 #define INIT_MMAP { &init_mm, 0, 0x40000000, __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED), VM_READ | VM_WRITE | VM_EXEC, NULL, &init_mm.mmap }
 
 #define INIT_TSS  { \
-	sizeof(init_kernel_stack) + (long) init_kernel_stack, 0, \
+	sizeof(init_stack) + (unsigned long) init_stack, 0, \
 	PS_S, KERNEL_DS, \
 	{0, 0}, 0, {0,}, {0, 0, 0}, {0,}, \
 }
@@ -93,9 +93,11 @@
 }
 
 /* Allocation and freeing of basic task resources. */
-#define alloc_task_struct()	kmalloc(sizeof(struct task_struct), GFP_KERNEL)
-#define alloc_kernel_stack(p)	__get_free_page(GFP_KERNEL)
-#define free_task_struct(p)	kfree(p)
-#define free_kernel_stack(page) free_page((page))
+#define alloc_task_struct() \
+	((struct task_struct *) __get_free_pages(GFP_KERNEL,1,0))
+#define free_task_struct(p)	free_pages((unsigned long)(p),1)
+
+#define init_task	(init_task_union.task)
+#define init_stack	(init_task_union.stack)
 
 #endif
--- include/asm-m68k/ptrace.h.~1~	Wed Apr 24 18:17:27 1996
+++ include/asm-m68k/ptrace.h	Thu May 15 15:24:00 1997
@@ -34,6 +34,7 @@
   long     d5;
   long     a0;
   long     a1;
+  long     a2;
   long     d0;
   long     orig_d0;
   long     stkadj;
@@ -50,7 +51,6 @@
 struct switch_stack {
 	unsigned long  d6;
 	unsigned long  d7;
-	unsigned long  a2;
 	unsigned long  a3;
 	unsigned long  a4;
 	unsigned long  a5;
--- include/asm-m68k/semaphore.h.~3~	Sat Apr 26 01:59:35 1997
+++ include/asm-m68k/semaphore.h	Thu May 15 08:57:33 1997
@@ -73,41 +73,26 @@
  * "down_failed" is a special asm handler that calls the C
  * routine that actually waits. See arch/m68k/lib/semaphore.S
  */
-extern inline void down(struct semaphore * sem)
+extern inline void do_down(struct semaphore * sem, void (*failed)(void))
 {
 	register struct semaphore *sem1 __asm__ ("%a1") = sem;
 	__asm__ __volatile__(
 		"| atomic down operation\n\t"
-		"lea %%pc@(1f),%%a0\n\t"
 		"subql #1,%0@\n\t"
-		"jmi " SYMBOL_NAME_STR(__down_failed) "\n"
-		"1:"
+		"jmi 2f\n"
+		"1:\n"
+		".section .text.lock,\"ax\"\n"
+		".even\n"
+		"2:\tpea 1b\n\t"
+		"jbra %1\n"
+		".previous"
 		: /* no outputs */
-		: "a" (sem1)
-		: "%a0", "memory");
+		: "a" (sem1), "m" (*(unsigned char *)failed)
+		: "memory");
 }
 
-/*
- * This version waits in interruptible state so that the waiting
- * process can be killed.  The down_failed_interruptible routine
- * returns negative for signalled and zero for semaphore acquired.
- */
-extern inline int down_interruptible(struct semaphore * sem)
-{
-	register int ret __asm__ ("%d0");
-	register struct semaphore *sem1 __asm__ ("%a1") = sem;
-	__asm__ __volatile__(
-		"| atomic interruptible down operation\n\t"
-		"lea %%pc@(1f),%%a0\n\t"
-		"subql #1,%1@\n\t"
-		"jmi " SYMBOL_NAME_STR(__down_failed_interruptible) "\n\t"
-		"clrl %0\n"
-		"1:"
-		: "=d" (ret)
-		: "a" (sem1)
-		: "%d0", "%a0", "memory");
-	return ret;
-}
+#define down(sem) do_down((sem),__down_failed)
+#define down_interruptible(sem) do_down((sem),__down_failed_interruptible)
 
 /*
  * Note! This is subtle. We jump to wake people up only if
@@ -120,13 +105,17 @@
 	register struct semaphore *sem1 __asm__ ("%a1") = sem;
 	__asm__ __volatile__(
 		"| atomic up operation\n\t"
-		"lea %%pc@(1f),%%a0\n\t"
-		"addql #1,%0\n\t"
-		"jle " SYMBOL_NAME_STR(__up_wakeup) "\n"
-		"1:"
+		"addql #1,%0@\n\t"
+		"jle 2f\n"
+		"1:\n"
+		".section .text.lock,\"ax\"\n"
+		".even\n"
+		"2:\tpea 1b\n\t"
+		"jbra %1\n"
+		".previous"
 		: /* no outputs */
-		: "m" (sem->count), "a" (sem1)
-		: "%a0", "memory");
+		: "a" (sem1), "m" (*(unsigned char *)__up_wakeup)
+		: "memory");
 }
 
 #endif
--- include/asm-m68k/softirq.h.~2~	Sat Apr 26 00:53:21 1997
+++ include/asm-m68k/softirq.h	Sat Apr 26 02:00:06 1997
@@ -38,6 +38,12 @@
 		bh_mask |= 1 << nr;
 }
 
+extern inline void remove_bh(int nr)
+{
+	bh_base[nr] = NULL;
+	bh_mask &= ~(1 << nr);
+}
+
 extern int __m68k_bh_counter;
 
 extern inline void start_bh_atomic(void)
--- include/asm-m68k/spinlock.h.~1~	Mon Apr 14 19:32:32 1997
+++ include/asm-m68k/spinlock.h	Sat Apr 26 02:03:07 1997
@@ -5,12 +5,16 @@
  * We don't do SMP on the m68k .... at least not yet.
  */
 
+/*
+ * Your basic spinlocks, allowing only a single CPU anywhere
+ */
 typedef struct { } spinlock_t;
 #define SPIN_LOCK_UNLOCKED { }
 
 #define spin_lock_init(lock)	do { } while(0)
 #define spin_lock(lock)		do { } while(0)
 #define spin_trylock(lock)	do { } while(0)
+#define spin_unlock_wait(lock)	do { } while(0)
 #define spin_unlock(lock)	do { } while(0)
 #define spin_lock_irq(lock)	cli()
 #define spin_unlock_irq(lock)	sti()
@@ -18,6 +22,37 @@
 #define spin_lock_irqsave(lock, flags) \
 	do { save_flags(flags); cli(); } while (0)
 #define spin_unlock_irqrestore(lock, flags) \
+	restore_flags(flags)
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+typedef struct { } rwlock_t;
+#define RW_LOCK_UNLOCKED { }
+
+#define read_lock(lock)		do { } while(0)
+#define read_unlock(lock)	do { } while(0)
+#define write_lock(lock)	do { } while(0)
+#define write_unlock(lock)	do { } while(0)
+#define read_lock_irq(lock)	cli()
+#define read_unlock_irq(lock)	sti()
+#define write_lock_irq(lock)	cli()
+#define write_unlock_irq(lock)	sti()
+
+#define read_lock_irqsave(lock, flags)	\
+	do { save_flags(flags); cli(); } while (0)
+#define read_unlock_irqrestore(lock, flags) \
+	restore_flags(flags)
+#define write_lock_irqsave(lock, flags)	\
+	do { save_flags(flags); cli(); } while (0)
+#define write_unlock_irqrestore(lock, flags) \
 	restore_flags(flags)
 
 #endif
--- include/asm-m68k/unistd.h.~1~	Wed Apr  9 20:52:15 1997
+++ include/asm-m68k/unistd.h	Thu May 15 19:17:48 1997
@@ -322,9 +322,11 @@
 	set_fs (KERNEL_DS);
 
 	__asm__ __volatile__
-	  ("trap #0\n\t"		/* Linux/m68k system call */
+	  ("clrl %%d2\n\t"
+	   "trap #0\n\t"		/* Linux/m68k system call */
 	   "tstl %0\n\t"		/* child or parent */
 	   "jne 1f\n\t"			/* parent - jump */
+	   "lea %%sp@(-8192),%6\n\t"	/* reload current */
 	   "movel %3,%%sp@-\n\t"	/* push argument */
 	   "jsr %4@\n\t"		/* call fn */
 	   "movel %0,%%d1\n\t"		/* pass exit value */
@@ -333,8 +335,8 @@
 	   "1:"
 	   : "=d" (retval)
 	   : "0" (__NR_clone), "i" (__NR_exit),
-	     "r" (arg), "a" (fn), "d" (clone_arg)
-	   : "d0");
+	     "r" (arg), "a" (fn), "d" (clone_arg), "r" (current)
+	   : "d0", "d2");
 
 	set_fs (fs);
 	return retval;
