/*
 *	LIFE: generate.c
 *
 *	Perform one life generation on the screen
 */

#define NROW 400		/* number of screen pixel rows */
#define VSW 40			/* how many words per line - (int *) increment */
#define VSB 80			/* Vertical Separation in bytes, == VSW*2 */
#define NCOL 80			/* number of screen *byte* rows (1/2 screen) */

/* from tables.c */
extern char *lifetab;		/* The Big One */
extern long spread[];		/* spread bits out to every 3 */

int *ncolp, *ocolp;	/* pointers to top of current column */

generate(oldscr, newscr)
int *oldscr, *newscr;	/* source and destination screen base pointers */
{
#define dbra dbf
asm {
	
	; A0 - scratch
	; A1 - lifetab		2 ind
	; A2 - osp		1 ind, 1 inc
	; A3 - nsp	2 ind, 1 inc
	; A4 - usually Megamax's for access to static variable.
	;	within column, address of the spread[] array
	; A5 - Store	subr pointer to lookup and store the current byte
	; A6 - Get10	subr pointer to get the next 10 bits off the screen
	; D7 - row loop counter used for dbra
	; D6 - work		3 read, 1 write
	; D5 - constant 06666666666	bottom row of work	1 use
	; D4 - constant 0777777		18 bit mask		1 use
	; D3 - constant 01777<<2	10 bit mask		1 use
	; D2 - constant 0xFF00		upper byte of word	1 every odd row
	; D1 - column loop counter

	; Copy chosen external variables into registers
	movea.l	lifetab(A4),A1

	; Some useful constants
	move.l	#06666666666,D5
	move.l	#0777777,D4
	move.l	#01777<<2,D3
	move.l	#0xFF00,D2

	/* Initialise top of column pointers */
	movea.l	oldscr(A6),A0
	move.l	A0,ocolp(A4)

	movea.l	newscr(A6),A0
	move.l	A0,ncolp(A4)

	/* that was our last use of A6, and we never use A5 (what *is* A5?) */
	movem.l	A5/A6,-(A7)

	/* Do first column */
	lea	fcget10(PC),A6	; Get10 = fcget10
	lea	ecstore(PC),A5	; Store = ecstore
	bsr	column			; column()

	/* Do columns 1..NCOL-2 */
				; for (col=1; col < NCOL-2; ) {
	move.w	#(NCOL)/2-2,D1	; NCOL-2 iterations (2 per loop)
for2:
		move.w	D1,-(A7)	; push col so we can use D1
		lea	ocget10(PC),A6	; Get10 = ocget10;
		lea	ocstore(PC),A5	; Store = ocstore;
		bsr	ocolumn		; do easy odd column

		addq.l	#2,ocolp(A4)	; move on to next column of integers
		addq.l	#2,ncolp(A4)

		lea	ecget10(PC),A6	; Get10 = ecget10;
		lea	ecstore(PC),A5	; Store = ecstore;
		jsr	ecolumn		; do easy even column

		move.w	(A7)+,D1	; restore col
dbra	D1,for2		; }

	/* Do last (right hand) column - relies on NCOLS being even */
	/* so that we can assume the last one is an odd column. */
	lea	lcget10(PC),A6	; Get10 = lcget10;
	lea	ocstore(PC),A5	; Store = ocstore;
	bsr	column		; column()

	movem.l	(A7)+,A5/A6	; restore megamax's registers
	jmp	ret		; exit generate() function

/* Do a general column, using function pointers to select Store and Get10 fns */
/* Used for first and last columns */
column:
	movea.l	ocolp(A4),A2		; nsp = &nscr[col>>1];
	movea.l	ncolp(A4),A3		; osp = &oscr[(col>>1)];

	/* Init workspace for top row */

	move.l	A4,-(A7)	; save Megamax's A4 so we can use it for spread
	lea	spread(A4),A4	; Hmm.  Poor A4.

	jsr	(A6)		; work = spread[get10()];
	move.l	0(A4,D0.w),D6

	move.w	#NROW-2,D7	; for NROW-1 iterations
for1:
		lea	VSB(A2),A2	; osp += VS; faster than adda.l #VSB,A2	

		/* roll workspace down and refill bottom row */
		asl.l	#1,D6		; work = ((work<<1) & 06666666666L)
		and.l	D5,D6
		jsr	(A6)		;	| spread[get10()];
		or.l	0(A4,D0.w),D6	; .w - table is only 4k long

		jsr	(A5)		; store();

		lea	VSB(A3),A3	; nsp += VS;
	dbra	D7,for1

	/* Do bottom of column, the byte on the bottom row */
	/* Roll workspace down. Degenerate case - no refill */

	asl.l	#1,D6		; work = ((work<<1) & 06666666666L)
	and.l	D5,D6

	jsr	(A5)		; store()

	/* Claw back A4 so that we can access external variables again */
	move.l	(A7)+,A4

	rts			; end of column()

/* Special-case even column, not used for first column of screen */
ecolumn:
	movea.l	ocolp(A4),A2		; nsp = &nscr[col>>1];
	movea.l	ncolp(A4),A3		; osp = &oscr[(col>>1)];

	/* Init workspace for top row */

	move.l	A4,-(A7)	; save Megamax's A4 so we can use it for spread
	lea	spread(A4),A4	; Hmm.  Poor A4.

	jsr	ecget10		; work = spread[get10()];
	move.l	0(A4,D0.w),D6

	move.w	#NROW-2,D7	; for NROW-1 iterations
for3:
		lea	VSB(A2),A2	; osp += VS; faster than adda.l #VSB,A2	

		/* roll workspace down and refill bottom row */
		asl.l	#1,D6		; work = ((work<<1) & 06666666666L)
		and.l	D5,D6

		/* get10 for even columns 2..NCOLS-2 */
		/* |               [|XXXXXXXX]       | */
		; return( (int)(*(long *)(osp-1) >> 7) & 01777);
		move.l	-2(A2),D0
		lsr.l	#5,D0
		and.w	D3,D0

		or.l	0(A4,D0.w),D6	; .w - table is only 4k long

		/* ecstore */
				; if (word == 0) {
		bne.s	else3
			move.w	#0,(A3)		; *nsp = 0;
			bra	fi3		; return;
else3:	
			/* store Most Significant Byte and clear LSB */
			jsr	newbyte		; *nsp = newbyte << 8;
			asl.w	#8,D0
			move.w	D0,(A3)
fi3:		/* end of ecstore */

		lea	VSB(A3),A3	; nsp += VS;
	dbra	D7,for3

	/* Do bottom of column, the byte on the bottom row */
	/* Roll workspace down. Degenerate case - no refill */

	asl.l	#1,D6		; work = ((work<<1) & 06666666666L)
	and.l	D5,D6

	jsr	ecstore		; store()

	/* Claw back A4 so that we can access external variables again */
	move.l	(A7)+,A4

	rts			; end of column()

/* Do an odd column, not the last */
ocolumn:
	movea.l	ocolp(A4),A2		; nsp = &nscr[col>>1];
	movea.l	ncolp(A4),A3		; osp = &oscr[(col>>1)];

	/* Init workspace for top row */

	move.l	A4,-(A7)	; save Megamax's A4 so we can use it for spread
	lea	spread(A4),A4	; Hmm.  Poor A4.

	jsr	ocget10		; work = spread[get10()];
	move.l	0(A4,D0.w),D6

	move.w	#NROW-2,D7	; for NROW-1 iterations
for4:
		lea	VSB(A2),A2	; osp += VS; faster than adda.l #VSB,A2	

		/* roll workspace down and refill bottom row */
		asl.l	#1,D6		; work = ((work<<1) & 06666666666L)
		and.l	D5,D6
					;	| spread[get10()];
		/* ocget10 */
		move.l	(A2),D0
		swap	D0	; these two instrs do D0 >>= 15
		rol.l	#3,D0	; 1 to get ] in, 2 to adjust for longword lookup
		and.w	D3,D0

		or.l	0(A4,D0.w),D6	; .w - table is only 4k long

		/* ocstore */
		bne.s	else4
			/* easy one - result is also 0 */
			and.w	D2,(A3)		; *nsp &= 0xFF00;
			bra.s	fi4
else4:				; }
		jsr	newbyte		; *nsp = (*nsp & 0xFF00) | newbyte;
		move.w	(A3),D1
		and.w	D2,D1
		or.w	D1,D0
		move.w	D0,(A3)
fi4:		/* end of ocstore */

		lea	VSB(A3),A3	; nsp += VS;
	dbra	D7,for4

	/* Do bottom of column, the byte on the bottom row */
	/* Roll workspace down. Degenerate case - no refill */

	asl.l	#1,D6		; work = ((work<<1) & 06666666666L)
	and.l	D5,D6

	jsr	ocstore		; store()

	/* Claw back A4 so that we can access external variables again */
	move.l	(A7)+,A4

	rts			; end of column()

/* look up and store byte in an even column */
ecstore:
	; Store is only ever called when the condition codes reflect D6
	; tst.l	D6		; if (work == 0L) {
	bne.s	fi1
		move.w	#0,(A3)		; *nsp = 0;
		rts			; return;
fi1:				; }

	/* store Most Significant Byte and clear LSB */
	jsr	newbyte		; *nsp = newbyte << 8;
	asl.w	#8,D0
	move.w	D0,(A3)
	rts

/* look up and write byte in an odd column */
ocstore:	
	; tst.l	D6		; if (work == 0L) {
	bne.s	fi2
		/* easy one - result is also 0 */
		and.w	D2,(A3)		; *nsp &= 0xFF00;
		rts			; return;
fi2:				; }
	jsr	newbyte		; *nsp = (*nsp & 0xFF00) | newbyte;
	move.w	(A3),D1
	and.w	D2,D1
	or.w	D1,D0
	move.w	D0,(A3)

	rts

/* Code to look the new byte up into D0 */
newbyte:	; ((lifetab[work>>12] << 4) | lifetab[work & 0777777L])
	moveq	#0,D0		; Clear top bits

	move.l	D6,D1		; D1 := work>>12
	swap	D1
	rol.l	#4,D1
	and.l	D4,D1		; leave only the bottom 18 bits
	move.b	0(A1,D1.l),D0	; D0 := lifetab[D1] << 4
	asl.b	#4,D0
	move.l	D6,D1		; D1 := work & 0777777
	and.l	D4,D1
	or.b	0(A1,D1.l),D0	; D0 |= lifetab[D1]
	
	rts

/* get10 for the first column */
fcget10:
	; return((*osp >> 7) & 01777);
	move.w	(A2),D0
	lsr.w	#5,D0	; right shift 7, up by 2 to index longword array 
	and.w	D3,D0
	rts

/* get10 for even columns 2..NCOLS-2 */
ecget10:
	/* |               [|XXXXXXXX]       | */
	; return( (int)(*(long *)(osp-1) >> 7) & 01777);
	move.l	-2(A2),D0
	lsr.l	#5,D0
	and.w	D3,D0
	rts

/* get10 for odd columns 1..NCOLS-3 */
ocget10:
	/* |       [XXXXXXXX|]               | */
	; return( (int)(*(long *)osp >> 15) & 01777 );
	move.l	(A2),D0
	swap	D0	; these two instrs do D0 >>= 15
	rol.l	#3,D0	; 1 to get ] in, 2 to adjust for longword lookup
	and.w	D3,D0
	rts

/* get10 for the final column */
lcget10:
	/* centred about last byte of the line */
	; return((*osp << 1) & 01777);
	move.w	(A2),D0
	lsl.w	#3,D0	; 1 for us, 2 for the longword array index
	and.w	D3,D0
	rts

ret:	/* return from generate() */
}
}
