Chronicles of a Restless Mind

Friday, 2013-11-01

The Proto Interpreter for J

Some abbreviations to support the terse style:

typedef char C;typedef long I;
#define P printf
#define R return

The Array data type:

typedef struct a{
	I t,r,d[3],p[2];
}*A;

Function prototypes (in K&R style?) for monadic and dyadic “verbs”. That's f(A w) and f(A a, A w).

#define V1(f) A f(w)A w;
#define V2(f) A f(a,w)A a,w;

Do x n times.

#define DO(n,x) {I i=0,_n=(n);for(;i<_n;++i){x;}}

Allocate memory (assuming 4-byte integers), and copy n integers from destination to source.

I *ma(n) { R(I*)malloc(n*4); }
mv(d,s,n) I *d,*s;{
	DO(n,d[i]=s[i]);
}

Get the size of an array (number of integers), multiplying the r dimensions from d. I have no idea why it's called tr.

tr(r,d)I *d;{
	I z=1;
	DO(r,z=z*d[i]);
	R z;
}

Get Array”—allocate a new A from the r dimensions at d, setting its t, r, and d parameters to the arguments given. Note that the allocation size ignores the p[2] field of the A structure.

Note also that tr(0,0) = 1, so this always gives an array with at least one value. It uses 0-dimensional values for scalars, so this is important.

A ga(t,r,d)I *d;{
	A z=(A)ma(5+tr(r,d));
	z->t=t, z->r=r, mv(z->d,d,r);
	R z;
}

V1(iota) declares a function iota(A w). This treats w as a single integer and returns an array whose values are the sequence 0..n-1. Note that the i there is the loop index provided by the DO macro.

V1(iota) {
	I n=*w->p;
	A z=ga(0,1,&n);
	DO(n,z->p[i]=i);
	R z;
}

Add two arrays element by element, allocating a new array for the result. Remember the V2 prototype: this is plus(a, w).

V2(plus) {
	I r=w->r, *d=w->d, n=tr(r,d);
	A z=ga(0,r,d);
	DO(n, z->p[i]=a->p[i]+w->p[i]);
	R z;
}

Index into an array: return w[a], treating a as a single integer. Basically it drops the first dimension of w and uses the remaining dimensions to compute the size of the new array.

V2(from) {
	I r=w->r-1, *d=w->d+1, n=tr(r,d);
	A z=ga(w->t,r,d);
	mv(z->p, w->p + (n * *a->p), n);
	R z;
}

Return a scalar pointer to w

V1(box) {
	A z=ga(1,0,0);
	*z->p=(I)w;
	R z;
}

Concatenate a and w, returning a one-dimensional array. Note that it uses the t from the second argument, but you should presumably never use it on arrays with different types, so it shouldn't be a problem.

V2(cat) {
	I an=tr(a->r,a->d), wn=tr(w->r,w->d), n=an+wn;
	A z=ga(w->t,1,&n);
	mv(z->p,a->p,an);
	mv(z->p+an,w->p,wn);
	R z;
}

Just unimplemented, presumably. A quick trawl through the J docs didn't turn up anything that looked like it would particularly fit this...

V2(find){}

Reshape array. The first parameter, a, gives dimensions (a scalar or a one-dimensional array) for a new array. The new array is initialized from the contents of w, which are repeated as necessary to fill the space.

V2(rsh) {
	I r=a->r?*a->d:1, n=tr(r,a->p), wn=tr(w->r,w->d);
	A z=ga(w->t, r, a->p);
	mv(z->p, w->p, wn=n>wn?wn:n);
	if(n-=wn) mv(z->p+wn,z->p,n);
	R z;
}

Get the dimensions (that is, the shape) of an array.

V1(sha) {
	A z=ga(0,1,&w->r);
	mv(z->p,w->d,w->r);
	R z;
}

The identity function, obviously.

V1(id) { R w; }

The size of an array (number of elements, i.e. the first dimension). This gives 1 for a scalar (instead of 0), but should otherwise be the same as 0{#x (the 0th element of sha(w)).

V1(size) {
	A z=ga(0,0,0);
	*z->p=w->r?*w->d:1;
	R z;
}

Print integers, newlines, and arrays. If t is set, the values are arrays and we print them recursively, otherwise we print them as integers.

pi(i) { P("%d ",i); }
nl() { P("\n"); }
pr(w) A w; {
	I r=w->r,*d=w->d, n=tr(r,d);
	DO(r,pi(d[i]));
	nl();
	if(w->t) DO(n, P("< "); pr(w->p[i]))
	else DO(n, pi(w->p[i]));
	nl();
}

The verb table (single-character names) and the function tables giving the dyadic and monadic behaviors for each verb.

C vt[]="+{~<#,";
A(*vd[])()={0,plus,from,find,0,rsh,cat},
 (*vm[])()={0,id,size,iota,box,sha,0};

The state table holds the values of the variables (‘a’ through ‘z’)

I st[26];

Is the token a variable name? Might be an abbreviation for “question-pronoun”?

qp(a) { R a>='a' && a<='z'; }

Is the token a verb? This is an extremely sloppy check, as we will then use the value to index into the vm table (which has 7 elements), but most of this code does no checking at all, so that's not surprising.

qv(a) { R a<'a'; }

Take a tokenized string which represents an expression and execute it recursively. Returns an array result.

We split the string, giving the value of the first token and the tail of the expression.

If value is a variable name, then (if the next token is ‘=’) we set the variable to ex(tail), otherwise we resolve the name to its value and continue.

Now, if the value is an verb, then do vm[value](ex(tail)). Otherwise, we look at the next token (call it verb) and the new tail. If verb is null, return value, otherwise do vd[verb](value, ex(tail)).

A ex(e) I *e; {
	I a=*e;
	if(qp(a)) {
		if(e[1]=='=') R st[a-'a']=ex(e+2);
		a=st[ a-'a'];
	}
	R qv(a) ?
		(*vm[a])(ex(e+1)) :
		e[1] ? (*vd[e[1]])(a,ex(e+2)) : (A)a;
}

When tokenizing a string, a noun is a scalar A from 0 to 9. A verb is a valid operator character (as a 1-based index into the function tables). Other characters are passed through unchanged (to be used as variable names).

noun(c) {
	A z;
	if(c<'0'||c>'9') R 0;
	z=ga(0,0,0);
	*z->p=c-'0';
	R z;
}

verb(c) {
	I i=0;
	for(;vt[i];) if(vt[i++]==c) R i;
	R 0;
}

I *wd(s)C *s; {
	I a, n=strlen(s), *e=ma(n+1);
	C c;
	DO(n, e[i] = (a=noun(c=s[i])) ? a : (a=verb(c))?a:c );
	e[n]=0;
	R e;
}

Then main is the obvious read/tokenize/execute/print loop.

main() {
	C s[99];
	while(gets(s)) pr(ex(wd(s)));
}

--Josh Grams <josh@qualdan.com>