/*
 *	mandelbrot fractal generating function.
 *	Fixed point version in 68000 assembly language for megamax c compiler
 *
 *	Martin Guy, UKC, February 1987.
 */

typedef	int	void;			/* for Megamax */

#define	MAX_W		640		/* Max. screen width for ANY mode */

extern int sc_piw, sc_pih, n_cols, st_rez;	/* Assorted screen attributes */

/*
 *	generate:
 *		Actually do the work of generating the Mandelbrot set.
 *	imports:
 *		sc_piw, sc_pih	width and height of output image
 *		n_cols		number of colours available
 *	exports:
 *		calls plot(x, y, col) to plot pixels where
 *				x = 0..sc_piw-1, y = 0..sc_pih-1
 *				col = 0..n_cols-1
 */

void
generate(cent_r, cent_i, ext_r, ext_i, maxiter)
double cent_r, cent_i;	/* centre of window on complex plane */
double ext_r, ext_i;	/* extent in the four directions (window width and height = 2*ext) */
int maxiter;		/* maximum number of iterations before assuming point is inside set */
{
	/*
	 *	In the absence of a floating point co-processor,
	 *	do all calculations in fixed point arithmetic.
	 *	Test should be |z| > 4 but can save a bit if we use |z| >= 4.
	 *	This does injustice to one point at (-2,0).
	 *
	 *	main loop:
	 *	if (zr2 + zi2 >= 4.0) break;
	 *		|z| < 4.0, zr2 < 4.0, zi2 < 4.0
	 *		-2.0 < zr < 2.0, -2.0 < zi < 2.0
	 *		-2.0 < zr * zi < 2.0
	 *	zi = 2 * zr * zi + ci;
	 *		-6.0 < zi < 6.0		maybe a bit tighter
	 *	zr = zr2 - zi2 + cr;
	 *		-6.0 < zr < 6.0		maybe a bit tighter
	 *	zr2 = zr * zr;
	 *		0 <= zr2 < 36.0
	 *	zi2 = zi * zi;
	 *		0 <= zi2 < 36.0
	 *	
	 *	Format of fixed point words (for 32 bit word):
	 *	s: 1 sign bit, 3 bits for whole numbers, 28 binary places.
	 *
	 *	Strategy for multiplying 2 fixed point numbers:
	 *	shift both down by 16 bits (->12bp)
	 *	Do signed multiply, giving number with 24 binary places
	 *	adjust back to 28bp using a shift (which preserves sign)
	 *
	 *	Note that we don't bother to adjust the results of some
	 *	multiplies until we need to, and compensate for the error
	 *	in the meanwhile. This also means that intermediate results
	 *	only go as high as 6 instead of 36, allowing 2 more bits of
	 *	precision.
	 */
	typedef long fixed;		/* our fixed-point type */
	/* number of binary places in word. Must be even. */
#	define BP (sizeof(fixed) * 8 - 4)
#	define ONE ((fixed)1 << BP)	/* where the binary point is */
#	define FIX(x)	((fixed)((x) * ONE)) /* turn double in to fixed */

	/* some need to be static so that asm can get at them through A4 */
	/* (Dunno how to get at locals and args directly with Megamax) */
	static fixed crs[MAX_W];	/* precomputed real starting values */
	static int line[MAX_W];		/* output colour values for line */ 
	static fixed ci;		/* starting value for this row */
	double xscale, yscale;		/* remove constant calcs from loop */
	register int x;			/* position on screen */
	int y;
	static int maxcolour;		/* maximum colour value */
	static int Maxiter;		/* static copy for asm */

	maxcolour = n_cols - 1;
	Maxiter = maxiter;

	/* precompute some loop constants */
	xscale = 2 * ext_r / (sc_piw - 1);
	yscale = 2 * ext_i / (sc_pih - 1);

	/* precompute real starting values */
	for (x=0; x < sc_piw; x++) {
		crs[x] = FIX((cent_r - ext_r) + x * xscale);
	}

	for (y=0; y < sc_pih; y++) {
		ci = FIX((cent_i - ext_i) + y * yscale);
			/*
			 *	D0	scratch
			 *	D1,D2	zr,zi
			 *	D3,D4	zr2,zi2
			 *	D5,D6	cr,ci
			 *	D7	niter
			 *	The following 2 index the last one we did
			 *	to facilitate end-testing.
			 *	A0	FIX(4.0) for quick test
			 *	A1	indexes output line
			 *	A2	points to input line or crs.
			 *	A3	&crs[0] to detect xloop termination
			 */
		/* x loop counts down from sc_piw-1 to 0 using A2 */
		asm {
			move.l	ci(A4),D6	; get ci

			move.w	sc_piw(A4),D0	; initialise x-loop counters
			asl.w	#1,D0		; line is an array of shorts
			lea.l	line(A4),A1
			lea.l	0(A1,D0.w),A1	; point A1 at last+1 of line
			asl.w	#1,D0		; but crs is array of long
			lea.l	crs(A4),A2
			movea.l	A2,A3		; store terminating value
			lea.l	0(A2,D0.w),A2	; point A2 past end of crs
			movea.l	#0x4000000,A0	; FIX(4) for quick test
						; unadjusted, with 24bp.
xloop:
			move.l	-(A2),D5	; cr = crs[x]

			move.l	D5,D1		; zr = cr
			move.l	D6,D2		; zi = ci

			move.w	Maxiter(A4),D7	; niter = Maxiter;
			bra	iterate
iloop:						; D1, D2 already swapped
			muls	D1,D2		; zi = zr * zi
			asl.l	#5,D2		; adjust and * 2
			add.l	D6,D2		; + ci

			move.l	D3,D1		; zr = zr2
			sub.l	D4,D1		; - zi2
			asl.l	#4,D1		; belated adjustment for D3,D4
			add.l	D5,D1		; + cr
iterate:
			swap	D1
			move.l	D1,D3
			muls	D3,D3		; zr2 = zr * zr

			swap	D2
			move.w	D2,D4
			muls	D4,D4		; zi2 = zi * zi
						; D3 and D4 not adjusted yet
			move.l	D3,D0		; if (zr2
			add.l	D4,D0		; +zi2
			cmp.l	A0,D0		; >= FIX(4)
			dbhi	D7,iloop	; || niter==maxiter) break;
enditer:
			; D7 is -1 if niter ran out before it went outside 4.0
			; D7 is Maxiter..0 if it went outside.
			; D7==Maxiter means it was already outside.
			; D7==0 means it went outside on the last iteration.
			; We want the inside to be black (ncols-1) and the
			; colour just outside the set to be not-black.
			and.w	maxcolour(A4),D7; colour mapping function
			move.w	D7,-(A1)	; store in line[x]
			cmpa.l	A2,A3		; end of x-loop?
			bne	xloop
		}

		for (x=sc_piw; --x >= 0; ) {
			plot(x, y, line[x]);
		}
	}
}
