/*
 *	LIFE: generate.c
 *
 *	Perform one life generation on the screen
 */

/* Current size: 3/4 of screen area for about a factor of two speedup */
#define NROW 300		/* number of screen pixel rows to do */
#define NCOL 60			/* number of screen *byte* rows to do */
#define VSW 40			/* how many words per line down the screen */
#define VSB 80			/* Vertical stride in bytes, == VSW*2 */

/* from tables.c */
extern char *lifetab;		/* The Big One */
extern long spread[];		/* spread bits out to every 3 */

int *ncolp, *ocolp;	/* Pointers to top of current column of old and new scr.
			 * ocolp points to the word containing byte at col top,
			 * ncolp points to the exact byte at the column top
			 */
generate(oldscr, newscr)
int *oldscr, *newscr;	/* source and destination screen base pointers */
{
#define dbra dbf
asm {
	
	; A0 - scratch
	; A1 - lifetab		2 ind
	; A2 - osp		1 ind, 1 inc
	; A3 - nsp		2 ind, 1 inc
	; A4 - usually Megamax's for access to static variable.
	;	within column, address of the spread[] array
	; A5 - pointer to store() subr for quick access with jsr (A5)
	; A6 - Get10	subr pointer to get the next 10 bits off the screen
	; D7 - row loop counter used for dbra
	; D6 - work		3 read, 1 write
	; D5 - constant 06666666666	bottom row of work	1 use
	; D4 - constant 0777777		18 bit mask		1 use
	; D3 - constant 01777<<2	10 bit mask		1 use
	; D2 - constant 0xFF00		upper byte of word	1 every odd row
	; D1 - column loop counter

	; Copy chosen external variables into registers
	movea.l	lifetab(A4),A1

	; Some useful constants
	move.l	#06666666666,D5
	move.l	#0777777,D4
	move.l	#01777<<2,D3
	move.l	#0xFF00,D2

	/* Initialise top of column pointers */
	movea.l	oldscr(A6),A0
	move.l	A0,ocolp(A4)

	movea.l	newscr(A6),A0
	move.l	A0,ncolp(A4)

	/* that was our last use of A6. Use it as a temp */
	movem.l	A5/A6,-(A7)

	/* Do first column */
	lea	fcget10(PC),A6	; Get10 = fcget10
	lea	store(PC),A5	; for quick access to store()
	bsr	column			; column()

	/* Do columns 1..NCOL-2 */
				; for (col=1; col < NCOL-2; ) {
	move.w	#(NCOL)/2-2,D1	; NCOL-2 iterations (2 per loop)
for2:
		move.w	D1,-(A7)	; push col so we can use D1

		addq.l	#1,ncolp(A4)
		lea	ocget10(PC),A6	; Get10 = ocget10;
		bsr	ocolumn		; do easy odd column

		addq.l	#2,ocolp(A4)	; move on to next column of integers
		addq.l	#1,ncolp(A4)

		lea	ecget10(PC),A6	; Get10 = ecget10;
		bsr	ecolumn		; do easy even column

		move.w	(A7)+,D1	; restore col
dbra	D1,for2		; }

	/* Do last (right hand) column - relies on NCOLS being even */
	/* so that we can assume the last one is an odd column. */
	addq.l	#1,ncolp(A4)
	lea	lcget10(PC),A6	; Get10 = lcget10;
	bsr	column		; column()

	movem.l	(A7)+,A5/A6	; restore megamax's registers
	jmp	ret		; exit generate() function

/* Do a general column, using function pointers to select Store and Get10 fns */
/* Used for first and last columns */
column:
	movea.l	ocolp(A4),A2		; nsp = &nscr[col>>1];
	movea.l	ncolp(A4),A3		; osp = &oscr[(col>>1)];

	/* Init workspace for top row */

	move.l	A4,-(A7)	; save Megamax's A4 so we can use it for spread
	lea	spread(A4),A4	; Hmm.  Poor A4.

	jsr	(A6)		; work = spread[get10()];
	move.l	0(A4,D0.w),D6

	move.w	#NROW-2,D7	; for NROW-1 iterations
for1:
		lea	VSB(A2),A2	; osp += VS; faster than adda.l #VSB,A2	

		/* roll workspace down and refill bottom row */
		asl.l	#1,D6		; work = ((work<<1) & 06666666666L)
		and.l	D5,D6
		jsr	(A6)		;	| spread[get10()];
		or.l	0(A4,D0.w),D6	; .w - table is only 4k long

		jsr	(A5)		; store();

		lea	VSB(A3),A3	; nsp += VS;
	dbra	D7,for1

	/* Do bottom of column, the byte on the bottom row */
	/* Roll workspace down. Degenerate case - no refill */

	asl.l	#1,D6		; work = ((work<<1) & 06666666666L)
	and.l	D5,D6

	jsr	(A5)		; store()

	/* Claw back A4 so that we can access external variables again */
	move.l	(A7)+,A4

	rts			; end of column()

/* Special-case even column, not used for first column of screen */
ecolumn:
	movea.l	ocolp(A4),A2		; nsp = &nscr[col>>1];
	movea.l	ncolp(A4),A3		; osp = &oscr[(col>>1)];

	/* Init workspace for top row */

	move.l	A4,-(A7)	; save Megamax's A4 so we can use it for spread
	lea	spread(A4),A4	; Hmm.  Poor A4.

	bsr	ecget10		; work = spread[get10()];
	move.l	0(A4,D0.w),D6

	move.w	#NROW-2,D7	; for NROW-1 iterations
for3:
		lea	VSB(A2),A2	; osp += VS; faster than adda.l #VSB,A2	

		/* roll workspace down and refill bottom row */
		asl.l	#1,D6		; work = ((work<<1) & 06666666666L)
		and.l	D5,D6

		/* get10 for even columns 2..NCOLS-2 */
		/* |               [|XXXXXXXX]       | */
		; return( (int)(*(long *)(osp-1) >> 7) & 01777);
		move.l	-2(A2),D0
		lsr.l	#5,D0
		and.w	D3,D0

		or.l	0(A4,D0.w),D6	; .w - table is only 4k long

		/* store */
				; if (word == 0) {
		;bne.s	else3
		;	clr.b	(A3)		; *nsp = 0;
		;	bra	fi3
else3:				; } else {
			/* newbyte */	; ((lifetab[work>>12] << 4) | lifetab[work & 0777777L])
			move.l	D6,D1		; D1 := work>>12
			swap	D1
			rol.l	#4,D1
			and.l	D4,D1		; leave only the bottom 18 bits
			move.b	0(A1,D1.l),D0	; D0 := lifetab[D1] << 4
			asl.b	#4,D0
			move.l	D6,D1		; D1 := work & 0777777
			and.l	D4,D1
			or.b	0(A1,D1.l),D0	; D0 |= lifetab[D1]
			/* end of newbyte */

			move.b	D0,(A3)
fi3:		/* end of store */	; }

		lea	VSB(A3),A3	; nsp += VS;
	dbra	D7,for3

	/* Do bottom of column, the byte on the bottom row */
	/* Roll workspace down. Degenerate case - no refill */

	asl.l	#1,D6		; work = ((work<<1) & 06666666666L)
	and.l	D5,D6

	jsr	(A5)		; store()

	/* Claw back A4 so that we can access external variables again */
	move.l	(A7)+,A4

	rts			; end of column()

/* Do an odd column, not the last */
ocolumn:
	movea.l	ocolp(A4),A2		; nsp = &nscr[col>>1];
	movea.l	ncolp(A4),A3		; osp = &oscr[(col>>1)];

	/* Init workspace for top row */

	move.l	A4,-(A7)	; save Megamax's A4 so we can use it for spread
	lea	spread(A4),A4	; Hmm.  Poor A4.

	jsr	ocget10		; work = spread[get10()];
	move.l	0(A4,D0.w),D6

	move.w	#NROW-2,D7	; for NROW-1 iterations
for4:
		lea	VSB(A2),A2	; osp += VS; faster than adda.l #VSB,A2	

		/* roll workspace down and refill bottom row */
		asl.l	#1,D6		; work = ((work<<1) & 06666666666L)
		and.l	D5,D6
					;	| spread[get10()];
		/* ocget10 */
		move.l	(A2),D0
		swap	D0	; these two instrs do D0 >>= 15
		rol.l	#3,D0	; 1 to get ] in, 2 to adjust for longword lookup
		and.w	D3,D0

		or.l	0(A4,D0.w),D6	; .w - table is only 4k long

		/* store */
		;bne.s	else4	; if (work == 0L) {
		;	/* easy one - result is also 0 */
		;	clr.b	(A3)		; *nsp = 0;
		;	bra.s	fi4
else4:				; } else {
			/* newbyte */	; ((lifetab[work>>12] << 4) | lifetab[work & 0777777L])
			move.l	D6,D1		; D1 := work>>12
			swap	D1
			rol.l	#4,D1
			and.l	D4,D1		; leave only the bottom 18 bits
			move.b	0(A1,D1.l),D0	; D0 := lifetab[D1] << 4
			asl.b	#4,D0
			move.l	D6,D1		; D1 := work & 0777777
			and.l	D4,D1
			or.b	0(A1,D1.l),D0	; D0 |= lifetab[D1]
			/* end of newbyte */

			move.b	D0,(A3)
fi4:		/* end of store */	; }

		lea	VSB(A3),A3	; nsp += VS;
	dbra	D7,for4

	/* Do bottom of column, the byte on the bottom row */
	/* Roll workspace down. Degenerate case - no refill */

	asl.l	#1,D6		; work = ((work<<1) & 06666666666L)
	and.l	D5,D6

	jsr	(A5)		; store()

	/* Claw back A4 so that we can access external variables again */
	move.l	(A7)+,A4

	rts			; end of column()

/* look up and store byte in an even column */
store:
	; Store is only ever called when the condition codes reflect D6
	;bne.s	fi1		; if (work == 0L) {
	;	clr.b	(A3)		; *nsp = 0;
	;	rts			; return;
fi1:				; }
	/* newbyte */	; ((lifetab[work>>12] << 4) | lifetab[work & 0777777L])
	move.l	D6,D1		; D1 := work>>12
	swap	D1
	rol.l	#4,D1
	and.l	D4,D1		; leave only the bottom 18 bits
	move.b	0(A1,D1.l),D0	; D0 := lifetab[D1] << 4
	asl.b	#4,D0
	move.l	D6,D1		; D1 := work & 0777777
	and.l	D4,D1
	or.b	0(A1,D1.l),D0	; D0 |= lifetab[D1]
	/* end of newbyte */

	move.b	D0,(A3)
	rts

/* get10 for the first column */
fcget10:
	; return((*osp >> 7) & 01777);
	move.w	(A2),D0
	lsr.w	#5,D0	; right shift 7, up by 2 to index longword array 
	and.w	D3,D0
	rts

/* get10 for even columns 2..NCOLS-2 */
ecget10:
	/* |               [|XXXXXXXX]       | */
	; return( (int)(*(long *)(osp-1) >> 7) & 01777);
	move.l	-2(A2),D0
	lsr.l	#5,D0
	and.w	D3,D0
	rts

/* get10 for odd columns 1..NCOLS-3 */
ocget10:
	/* |       [XXXXXXXX|]               | */
	; return( (int)(*(long *)osp >> 15) & 01777 );
	move.l	(A2),D0
	swap	D0	; these two instrs do D0 >>= 15
	rol.l	#3,D0	; 1 to get ] in, 2 to adjust for longword lookup
	and.w	D3,D0
	rts

/* get10 for the final column */
lcget10:
	/* centred about last byte of the line */
	; return((*osp << 1) & 01777);
	move.w	(A2),D0
	lsl.w	#3,D0	; 1 for us, 2 for the longword array index
	and.w	D3,D0
	rts

ret:	/* return from generate() */
}
}
