minix/commands/i386/gas2ack/parse_gnu.c

/*	parse_ack.c - parse GNU assembly		Author: R.S. Veldema
 *							 <rveldema@cs.vu.nl>
 *								26 Aug 1996
 */
#define nil 0
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <assert.h>
#include "asmconv.h"
#include "token.h"
#include "asm86.h"
#include "languages.h"
#include "globals.h"

typedef struct mnemonic {	/* GNU as86 mnemonics translation table. */
	char		*name;
	opcode_t	opcode;
	optype_t	optype;
} mnemonic_t;

static mnemonic_t mnemtab[] = {			/* This array is sorted. */
	{ ".align",	DOT_ALIGN,	PSEUDO },
	{ ".ascii",	DOT_ASCII,	PSEUDO },
	{ ".asciz",	DOT_ASCIZ,	PSEUDO },
	{ ".assert",	DOT_ASSERT,	PSEUDO },
	{ ".balign",	DOT_ALIGN,	PSEUDO },
	{ ".base",	DOT_BASE,	PSEUDO },
	{ ".bss",	DOT_BSS,	PSEUDO },
	{ ".byte",	DOT_DATA1,	PSEUDO },
	{ ".comm",	DOT_COMM,	PSEUDO },
	{ ".data",	DOT_DATA,	PSEUDO },
	{ ".data1",	DOT_DATA1,	PSEUDO },
	{ ".data2",	DOT_DATA2,	PSEUDO },
	{ ".data4",	DOT_DATA4,	PSEUDO },
	{ ".end",	DOT_END,	PSEUDO },
	{ ".extern",	DOT_EXTERN,	PSEUDO },
	{ ".file",	DOT_FILE,	PSEUDO },
	{ ".global",	DOT_DEFINE,	PSEUDO },
	{ ".globl",	DOT_DEFINE,	PSEUDO },
	{ ".lcomm",	DOT_LCOMM,	PSEUDO },
	{ ".line",	DOT_LINE,	PSEUDO },
	{ ".list",	DOT_LIST,	PSEUDO },
	{ ".long",	DOT_DATA4,	PSEUDO },
	{ ".nolist",	DOT_NOLIST,	PSEUDO },
	{ ".rom",	DOT_ROM,	PSEUDO },
	{ ".short",	DOT_DATA2,	PSEUDO },
	{ ".space",	DOT_SPACE,	PSEUDO },
	{ ".symb",	DOT_SYMB,	PSEUDO },
	{ ".text",	DOT_TEXT,	PSEUDO },
	{ ".word",	DOT_DATA2,	PSEUDO },
	{ "aaa",	AAA,		WORD },
	{ "aad",	AAD,		WORD },
	{ "aam",	AAM,		WORD },
	{ "aas",	AAS,		WORD },
	{ "adcb",	ADC,		BYTE },
	{ "adcl",	ADC,		WORD },
	{ "adcw",	ADC,		OWORD },
	{ "addb",	ADD,		BYTE },
	{ "addl",	ADD,		WORD },
	{ "addw",	ADD,		OWORD },
	{ "andb",	AND,		BYTE },
	{ "andl",	AND,		WORD },
	{ "andw",	AND,		OWORD },
	{ "arpl",	ARPL,		WORD },
	{ "bound",	BOUND,		WORD },
	{ "bsf",	BSF,		WORD },
	{ "bsr",	BSR,		WORD },
	{ "bswap",	BSWAP,		WORD },
	{ "btc",	BTC,		WORD },
	{ "btl",	BT,		WORD },
	{ "btr",	BTR,		WORD },
	{ "bts",	BTS,		WORD },
	{ "btw",	BT,		OWORD },
	{ "call",	CALL,		JUMP },
	{ "callf",	CALLF,		JUMP },
	{ "cbtw",	CBW,		OWORD },
	{ "cbw",	CBW,		WORD },
	{ "cdq",	CWD,		WORD },
	{ "clc",	CLC,		WORD },
	{ "cld",	CLD,		WORD },
	{ "cli",	CLI,		WORD },
	{ "cltd",	CWD,		WORD },
	{ "clts",	CLTS,		WORD },
	{ "cmc",	CMC,		WORD },
	{ "cmpb",	CMP,		BYTE },
	{ "cmpl",	CMP,		WORD },
	{ "cmps",	CMPS,		WORD },
	{ "cmpsb",	CMPS,		BYTE },
	{ "cmpsl",	CMPS,		OWORD },
	{ "cmpsw",	CMPS,		WORD },
	{ "cmpw",	CMP,		OWORD },
	{ "cmpxchg",	CMPXCHG,	WORD },
	{ "cpuid",	CPUID,		WORD },
	{ "cwd",	CWD,		WORD },
	{ "cwde",	CBW,		WORD },
	{ "cwtd",	CWD,		OWORD },
	{ "cwtl",	CBW,		WORD },
	{ "daa",	DAA,		WORD },
	{ "das",	DAS,		WORD },
	{ "decb",	DEC,		BYTE },
	{ "decl",	DEC,		WORD },
	{ "decw",	DEC,		OWORD },
	{ "divb",	DIV,		BYTE },
	{ "divl",	DIV,		WORD },
	{ "divw",	DIV,		OWORD },
	{ "enter",	ENTER,		WORD },
	{ "f2xm1",	F2XM1,		WORD },
	{ "fabs",	FABS,		WORD },
	{ "fadd",	FADD,		WORD },
	{ "faddd",	FADDD,		WORD },
	{ "faddp",	FADDP,		WORD },
	{ "fadds",	FADDS,		WORD },
	{ "fbld",	FBLD,		WORD },
	{ "fbstp",	FBSTP,		WORD },
	{ "fchs",	FCHS,		WORD },
	{ "fcomd",	FCOMD,		WORD },
	{ "fcompd",	FCOMPD,		WORD },
	{ "fcompp",	FCOMPP,		WORD },
	{ "fcomps",	FCOMPS,		WORD },
	{ "fcoms",	FCOMS,		WORD },
	{ "fcos",	FCOS,		WORD },
	{ "fdecstp",	FDECSTP,	WORD },
	{ "fdivd",	FDIVD,		WORD },
	{ "fdivp",	FDIVP,		WORD },
	{ "fdivrd",	FDIVRD,		WORD },
	{ "fdivrp",	FDIVRP,		WORD },
	{ "fdivrs",	FDIVRS,		WORD },
	{ "fdivs",	FDIVS,		WORD },
	{ "ffree",	FFREE,		WORD },
	{ "fiaddl",	FIADDL,		WORD },
	{ "fiadds",	FIADDS,		WORD },
	{ "ficom",	FICOM,		WORD },
	{ "ficomp",	FICOMP,		WORD },
	{ "fidivl",	FIDIVL,		WORD },
	{ "fidivrl",	FIDIVRL,	WORD },
	{ "fidivrs",	FIDIVRS,	WORD },
	{ "fidivs",	FIDIVS,		WORD },
	{ "fildl",	FILDL,		WORD },
	{ "fildq",	FILDQ,		WORD },
	{ "filds",	FILDS,		WORD },
	{ "fimull",	FIMULL,		WORD },
	{ "fimuls",	FIMULS,		WORD },
	{ "fincstp",	FINCSTP,	WORD },
	{ "fistl",	FISTL,		WORD },
	{ "fistp",	FISTP,		WORD },
	{ "fists",	FISTS,		WORD },
	{ "fisubl",	FISUBL,		WORD },
	{ "fisubrl",	FISUBRL,	WORD },
	{ "fisubrs",	FISUBRS,	WORD },
	{ "fisubs",	FISUBS,		WORD },
	{ "fld1",	FLD1,		WORD },
	{ "fldcw",	FLDCW,		WORD },
	{ "fldd",	FLDD,		WORD },
	{ "fldenv",	FLDENV,		WORD },
	{ "fldl2e",	FLDL2E,		WORD },
	{ "fldl2t",	FLDL2T,		WORD },
	{ "fldlg2",	FLDLG2,		WORD },
	{ "fldln2",	FLDLN2,		WORD },
	{ "fldpi",	FLDPI,		WORD },
	{ "flds",	FLDS,		WORD },
	{ "fldx",	FLDX,		WORD },
	{ "fldz",	FLDZ,		WORD },
	{ "fmuld",	FMULD,		WORD },
	{ "fmulp",	FMULP,		WORD },
	{ "fmuls",	FMULS,		WORD },
	{ "fnclex",	FCLEX,		WORD },
	{ "fninit",	FINIT,		WORD },
	{ "fnop",	FNOP,		WORD },
	{ "fnsave",	FSAVE,		WORD },
	{ "fnstcw",	FSTCW,		WORD },
	{ "fnstenv",	FSTENV,		WORD },
	{ "fpatan",	FPATAN,		WORD },
	{ "fprem",	FPREM,		WORD },
	{ "fprem1",	FPREM1,		WORD },
	{ "fptan",	FPTAN,		WORD },
	{ "frndint",	FRNDINT,	WORD },
	{ "frstor",	FRSTOR,		WORD },
	{ "fscale",	FSCALE,		WORD },
	{ "fsin",	FSIN,		WORD },
	{ "fsincos",	FSINCOS,	WORD },
	{ "fsqrt",	FSQRT,		WORD },
	{ "fstd",	FSTD,		WORD },
	{ "fstpd",	FSTPD,		WORD },
	{ "fstps",	FSTPS,		WORD },
	{ "fstpx",	FSTPX,		WORD },
	{ "fsts",	FSTS,		WORD },
	{ "fstsw",	FSTSW,		WORD },
	{ "fsubd",	FSUBD,		WORD },
	{ "fsubp",	FSUBP,		WORD },
	{ "fsubpr",	FSUBPR,		WORD },
	{ "fsubrd",	FSUBRD,		WORD },
	{ "fsubrs",	FSUBRS,		WORD },
	{ "fsubs",	FSUBS,		WORD },
	{ "ftst",	FTST,		WORD },
	{ "fucom",	FUCOM,		WORD },
	{ "fucomp",	FUCOMP,		WORD },
	{ "fucompp",	FUCOMPP,	WORD },
	{ "fxam",	FXAM,		WORD },
	{ "fxch",	FXCH,		WORD },
	{ "fxtract",	FXTRACT,	WORD },
	{ "fyl2x",	FYL2X,		WORD },
	{ "fyl2xp1",	FYL2XP1,	WORD },
	{ "hlt",	HLT,		WORD },
	{ "idivb",	IDIV,		BYTE },
	{ "idivl",	IDIV,		WORD },
	{ "idivw",	IDIV,		OWORD },
	{ "imulb",	IMUL,		BYTE },
	{ "imull",	IMUL,		WORD },
	{ "imulw",	IMUL,		OWORD },
	{ "inb",	IN,		BYTE },
	{ "incb",	INC,		BYTE },
	{ "incl",	INC,		WORD },
	{ "incw",	INC,		OWORD },
	{ "inl",	IN,		WORD },
	{ "insb",	INS,		BYTE },
	{ "insl",	INS,		WORD },
	{ "insw",	INS,		OWORD },
	{ "int",	INT,		WORD },
	{ "into",	INTO,		JUMP },
	{ "invd",	INVD,		WORD },
	{ "invlpg",	INVLPG,		WORD },
	{ "inw",	IN,		OWORD },
	{ "iret",	IRET,		JUMP },
	{ "iretd",	IRETD,		JUMP },
	{ "ja",		JA,		JUMP },
	{ "jae",	JAE,		JUMP },
	{ "jb",		JB,		JUMP },
	{ "jbe",	JBE,		JUMP },
	{ "jc",		JB,		JUMP },
	{ "jcxz",	JCXZ,		JUMP },
	{ "je",		JE,		JUMP },
	{ "jecxz",	JCXZ,		JUMP },
	{ "jg",		JG,		JUMP },
	{ "jge",	JGE,		JUMP },
	{ "jl",		JL,		JUMP },
	{ "jle",	JLE,		JUMP },
	{ "jmp",	JMP,		JUMP },
	{ "jmpf",	JMPF,		JUMP },
	{ "jna",	JBE,		JUMP },
	{ "jnae",	JB,		JUMP },
	{ "jnb",	JAE,		JUMP },
	{ "jnbe",	JA,		JUMP },
	{ "jnc",	JAE,		JUMP },
	{ "jne",	JNE,		JUMP },
	{ "jng",	JLE,		JUMP },
	{ "jnge",	JL,		JUMP },
	{ "jnl",	JGE,		JUMP },
	{ "jnle",	JG,		JUMP },
	{ "jno",	JNO,		JUMP },
	{ "jnp",	JNP,		JUMP },
	{ "jns",	JNS,		JUMP },
	{ "jnz",	JNE,		JUMP },
	{ "jo",		JO,		JUMP },
	{ "jp",		JP,		JUMP },
	{ "js",		JS,		JUMP },
	{ "jz",		JE,		JUMP },
	{ "lahf",	LAHF,		WORD },
	{ "lar",	LAR,		WORD },
	{ "lds",	LDS,		WORD },
	{ "leal",	LEA,		WORD },
	{ "leave",	LEAVE,		WORD },
	{ "leaw",	LEA,		OWORD },
	{ "les",	LES,		WORD },
	{ "lfs",	LFS,		WORD },
	{ "lgdt",	LGDT,		WORD },
	{ "lgs",	LGS,		WORD },
	{ "lidt",	LIDT,		WORD },
	{ "ljmp",	JMPF,		JUMP },
	{ "ljmpw",	JMPF,		JUMP16 },
	{ "lldt",	LLDT,		WORD },
	{ "lmsw",	LMSW,		WORD },
	{ "lock",	LOCK,		WORD },
	{ "lods",	LODS,		WORD },
	{ "lodsb",	LODS,		BYTE },
	{ "loop",	LOOP,		JUMP },
	{ "loope",	LOOPE,		JUMP },
	{ "loopne",	LOOPNE,		JUMP },
	{ "loopnz",	LOOPNE,		JUMP },
	{ "loopz",	LOOPE,		JUMP },
	{ "lretw",	RETF,		JUMP16 },
	{ "lsl",	LSL,		WORD },
	{ "lss",	LSS,		WORD },
	{ "ltr",	LTR,		WORD },
	{ "movb",	MOV,		BYTE },
	{ "movl",	MOV,		WORD },
	{ "movsb",	MOVS,		BYTE },
	{ "movsbl",	MOVSXB,		WORD },
	{ "movsbw",	MOVSXB,		OWORD },
	{ "movsl",	MOVS,		WORD },
	{ "movsw",	MOVS,		OWORD },
	{ "movswl",	MOVSX,		WORD },
	{ "movw",	MOV,		OWORD },
	{ "movzbl",	MOVZXB,		WORD },
	{ "movzbw",	MOVZXB,		OWORD },
	{ "movzwl",	MOVZX,		WORD },
	{ "mulb",	MUL,		BYTE },
	{ "mull",	MUL,		WORD },
	{ "mulw",	MUL,		OWORD },
	{ "negb",	NEG,		BYTE },
	{ "negl",	NEG,		WORD },
	{ "negw",	NEG,		OWORD },
	{ "nop",	NOP,		WORD },
	{ "notb",	NOT,		BYTE },
	{ "notl",	NOT,		WORD },
	{ "notw",	NOT,		OWORD },
	{ "orb",	OR,		BYTE },
	{ "orl",	OR,		WORD },
	{ "orw",	OR,		OWORD },
	{ "outb",	OUT,		BYTE },
	{ "outl",	OUT,		WORD },
	{ "outsb",	OUTS,		BYTE },
	{ "outsl",	OUTS,		WORD },
	{ "outsw",	OUTS,		OWORD },
	{ "outw",	OUT,		OWORD },
	{ "pop",	POP,		WORD },
	{ "popa",	POPA,		WORD },
	{ "popal",	POPAD,		WORD },
	{ "popf",	POPF,		WORD },
	{ "popfl",	POPF,		WORD },
	{ "popl",	POP,		WORD },
	{ "popw",	POP,		OWORD },
	{ "push",	PUSH,		WORD },
	{ "pusha",	PUSHA,		WORD },
	{ "pushal",	PUSHAD,		WORD },
	{ "pushf",	PUSHF,		WORD },
	{ "pushl",	PUSH,		WORD },
	{ "pushw",	PUSH,		OWORD },
	{ "rclb",	RCL,		BYTE },
	{ "rcll",	RCL,		WORD },
	{ "rclw",	RCL,		OWORD },
	{ "rcrb",	RCR,		BYTE },
	{ "rcrl",	RCR,		WORD },
	{ "rcrw",	RCR,		OWORD },
	{ "ret",	RET,		JUMP },
	{ "retf",	RETF,		JUMP },
	{ "rolb",	ROL,		BYTE },
	{ "roll",	ROL,		WORD },
	{ "rolw",	ROL,		OWORD },
	{ "rorb",	ROR,		BYTE },
	{ "rorl",	ROR,		WORD },
	{ "rorw",	ROR,		OWORD },
	{ "sahf",	SAHF,		WORD },
	{ "salb",	SAL,		BYTE },
	{ "sall",	SAL,		WORD },
	{ "salw",	SAL,		OWORD },
	{ "sarb",	SAR,		BYTE },
	{ "sarl",	SAR,		WORD },
	{ "sarw",	SAR,		OWORD },
	{ "sbbb",	SBB,		BYTE },
	{ "sbbl",	SBB,		WORD },
	{ "sbbw",	SBB,		OWORD },
	{ "scasb",	SCAS,		BYTE },
	{ "scasl",	SCAS,		WORD },
	{ "scasw",	SCAS,		OWORD },
	{ "seta",	SETA,		BYTE },
	{ "setae",	SETAE,		BYTE },
	{ "setb",	SETB,		BYTE },
	{ "setbe",	SETBE,		BYTE },
	{ "sete",	SETE,		BYTE },
	{ "setg",	SETG,		BYTE },
	{ "setge",	SETGE,		BYTE },
	{ "setl",	SETL,		BYTE },
	{ "setna",	SETBE,		BYTE },
	{ "setnae",	SETB,		BYTE },
	{ "setnb",	SETAE,		BYTE },
	{ "setnbe",	SETA,		BYTE },
	{ "setne",	SETNE,		BYTE },
	{ "setng",	SETLE,		BYTE },
	{ "setnge",	SETL,		BYTE },
	{ "setnl",	SETGE,		BYTE },
	{ "setnle",	SETG,		BYTE },
	{ "setno",	SETNO,		BYTE },
	{ "setnp",	SETNP,		BYTE },
	{ "setns",	SETNS,		BYTE },
	{ "seto",	SETO,		BYTE },
	{ "setp",	SETP,		BYTE },
	{ "sets",	SETS,		BYTE },
	{ "setz",	SETE,		BYTE },
	{ "sgdt",	SGDT,		WORD },
	{ "shlb",	SHL,		BYTE },
	{ "shldl",	SHLD,		WORD },
	{ "shll",	SHL,		WORD },
	{ "shlw",	SHL,		OWORD },
	{ "shrb",	SHR,		BYTE },
	{ "shrdl",	SHRD,		WORD },
	{ "shrl",	SHR,		WORD },
	{ "shrw",	SHR,		OWORD },
	{ "sidt",	SIDT,		WORD },
	{ "sldt",	SLDT,		WORD },
	{ "smsw",	SMSW,		WORD },
	{ "stc",	STC,		WORD },
	{ "std",	STD,		WORD },
	{ "sti",	STI,		WORD },
	{ "stosb",	STOS,		BYTE },
	{ "stosl",	STOS,		WORD },
	{ "stosw",	STOS,		OWORD },
	{ "str",	STR,		WORD },
	{ "subb",	SUB,		BYTE },
	{ "subl",	SUB,		WORD },
	{ "subw",	SUB,		OWORD },
	{ "testb",	TEST,		BYTE },
	{ "testl",	TEST,		WORD },
	{ "testw",	TEST,		OWORD },
	{ "verr",	VERR,		WORD },
	{ "verw",	VERW,		WORD },
	{ "wait",	WAIT,		WORD },
	{ "wbinvd",	WBINVD,		WORD },
	{ "xadd",	XADD,		WORD },
	{ "xchgb",	XCHG,		BYTE },
	{ "xchgl",	XCHG,		WORD },
	{ "xchgw",	XCHG,		OWORD },
	{ "xlat",	XLAT,		WORD },
	{ "xorb",	XOR,		BYTE },
	{ "xorl",	XOR,		WORD },
	{ "xorw",	XOR,		OWORD },
};

void gnu_parse_init(char *file)
/* Prepare parsing of an GNU assembly file. */
{
	tok_init(file, '#');
}

static void zap(void)
/* An error, zap the rest of the line. */
{
	token_t *t;

	while ((t= get_token(0))->type != T_EOF && t->symbol != ';'
			&& t->type != T_COMMENT)
		skip_token(1);
}

/* same as in ACK */
static int zap_unknown(asm86_t *a)
/* An error, zap the rest of the line. */
{
	token_t *t;
#define MAX_ASTR	4096
	char astr[MAX_ASTR];
	unsigned astr_len = 0;

	astr[astr_len++] = '\t';
	while ((t= get_token(0))->type != T_EOF && t->symbol != ';'
			&& t->type != T_COMMENT) {
		switch(t->type) {
			case T_CHAR:
				astr[astr_len++] = t->symbol;
				break;
			case T_WORD:
			case T_STRING:
				strncpy(astr + astr_len, t->name, t->len);
				astr_len += t->len;
				break;

		}
		skip_token(1);
	}
	astr[astr_len++] = '\0';

	a->raw_string = malloc(astr_len);
	if (!a->raw_string)
		return -1;

	strcpy(a->raw_string, astr);

	return 0;
}

static mnemonic_t *search_mnem(char *name)
/* Binary search for a mnemonic.  (That's why the table is sorted.) */
{
	int low, mid, high;
	int cmp;
	mnemonic_t *m;
	char name_buf[64];
	int brk = 0;

try_long:
	low= 0;
	high= arraysize(mnemtab)-1;
	while (low <= high) {
		mid= (low + high) / 2;
		m= &mnemtab[mid];

		if ((cmp= strcmp(name, m->name)) == 0) return m;

		if (cmp < 0) high= mid-1; else low= mid+1;
	}

	/*
	 * in gnu the modifier 'l' is usually omitted, however we need the
	 * information about the arguments length. Therefore we try if we know
	 * such instruction. It covers most of the cases of unknown instructions
	 */
	if (!brk) {
		int len = strlen(name);
		strcpy(name_buf, name);
		name_buf[len] = 'l';
		name_buf[len + 1] = '\0';
		name = name_buf;
		brk = 1;
		goto try_long;
	}

	return nil;
}

static expression_t *gnu_get_C_expression(int *pn)
/* Read a "C-like" expression.  Note that we don't worry about precedence,
 * the expression is printed later like it is read.  If the target language
 * does not have all the operators (like ~) then this has to be repaired by
 * changing the source file.  (No problem, you still have one source file
 * to maintain, not two.)
 */
{
	expression_t *e, *a1, *a2;
	token_t *t;

	if ((t= get_token(*pn))->symbol == '(') {
		/* ( expr ): grouping. */
		(*pn)++;
		if ((a1= gnu_get_C_expression(pn)) == nil) return nil;
		if (get_token(*pn)->symbol != ')') {
			parse_err(1, t, "missing )\n");
			del_expr(a1);
			return nil;
		}
		(*pn)++;
		e= new_expr();
		e->operator= '[';
		e->middle= a1;
	} else
	if (t->type == T_WORD || t->type == T_STRING) {
		/* Label, number, or string. */
		e= new_expr();
		e->operator= t->type == T_WORD ? 'W' : 'S';
		e->name= allocate(nil, (t->len+1) * sizeof(e->name[0]));
		memcpy(e->name, t->name , t->len+1);
		e->len= t->len;
		(*pn)++;
	} else
	if (t->symbol == '+' || t->symbol == '-' || t->symbol == '~') {
		/* Unary operator. */
		(*pn)++;
		if ((a1= gnu_get_C_expression(pn)) == nil) return nil;
		e= new_expr();
		e->operator= t->symbol;
		e->middle= a1;
	} else {
		parse_err(1, t, "expression syntax error\n");
		return nil;
	}

	switch ((t= get_token(*pn))->symbol) {
	case '%':
	case '+':
	case '-':
	case '*':
	case '/':
	case '&':
	case '|':
	case '^':
	case S_LEFTSHIFT:
	case S_RIGHTSHIFT:
		(*pn)++;
		a1= e;
		if ((a2= gnu_get_C_expression(pn)) == nil) {
			del_expr(a1);
			return nil;
		}
		e= new_expr();
		e->operator= t->symbol;
		e->left= a1;
		e->right= a2;
	}
	return e;
}

static expression_t *gnu_get_operand(asm86_t * a, int *pn, int deref)
/* Get something like: $immed, memory, offset(%base,%index,scale), or simpler. */
{
	expression_t *e, *offset, *base, *index;
	token_t *t;
	int c;

	if (get_token(*pn)->symbol == '$') {
		/* An immediate value. */
		(*pn)++;
		return gnu_get_C_expression(pn);
	}

	if (get_token(*pn)->symbol == '*') {
		(*pn)++;
		deref = 1;
#if 0
		if ((offset= gnu_get_operand(a, pn, deref)) == nil) return nil;
#if 0
		e= new_expr();
		e->operator= '(';
		e->middle= offset;
		return e;
#endif
		return offset;
#endif
	}

	if ((get_token(*pn)->symbol == '%')
		&& (t= get_token(*pn + 1))->type == T_WORD
		&& isregister(t->name)
	) {
		/* A register operand. */
		(*pn)+= 2;
		e= new_expr();
		e->operator= 'W';
		e->name= copystr(t->name);
		return e;
	}

	/* Offset? */
	if (get_token(*pn)->symbol != '('
				|| get_token(*pn + 1)->symbol != '%') {
		/* There is an offset. */
		if ((offset= gnu_get_C_expression(pn)) == nil) return nil;
	} else {
		/* No offset. */
		offset= nil;
	}

	/* (%base,%index,scale) ? */
	base= index= nil;
	if (get_token(*pn)->symbol == '(') {
		(*pn)++;

		/* %base ? */
		if (get_token(*pn)->symbol == '%'
			&& (t= get_token(*pn + 1))->type == T_WORD
			&& isregister(t->name)
		) {
			/* A base register expression. */
			base= new_expr();
			base->operator= 'B';
			base->name= copystr(t->name);
			(*pn)+= 2;
		}

		if (get_token(*pn)->symbol == ',') (*pn)++;

		/* %index ? */
		if (get_token(*pn)->symbol == '%'
			&& (t= get_token(*pn + 1))->type == T_WORD
			&& isregister(t->name)
		) {
			/* A index register expression. */
			index= new_expr();
			index->operator= '1';		/* for now */
			index->name= copystr(t->name);
			(*pn)+= 2;
		}

		if (get_token(*pn)->symbol == ',') (*pn)++;

		/* scale ? */
		if ((base != nil || index != nil)
			&& (t= get_token(*pn))->type == T_WORD
			&& strchr("1248", t->name[0]) != nil
			&& t->name[1] == 0
		) {
			if (index == nil) {
				/* Base is really an index register. */
				index= base;
				base= nil;
			}
			index->operator= t->name[0];
			(*pn)++;
		}

		if (get_token(*pn)->symbol == ')') {
			/* Ending paren. */
			(*pn)++;
		} else {
			/* Alas. */
			parse_err(1, t, "operand syntax error\n");
			del_expr(offset);
			del_expr(base);
			del_expr(index);
			return nil;
		}
	}

	if (base == nil && index == nil) {
		if (deref) {
			/* Return a lone offset as (offset). */
			e= new_expr();
			e->operator= '(';
			e->middle= offset;
		} else {
			/* Return a lone offset as is. */
			e= offset;
		}
	} else {
		e= new_expr();
		e->operator= 'O';
		e->left= offset;

		e->middle= base;
		e->right= index;
	}
	return e;
}

static expression_t *gnu_get_oplist(asm86_t * a, int *pn, int deref)
/* Get a comma (or colon for jmpf and callf) separated list of instruction
 * operands.
 */
{
	expression_t *e, *o1, *o2;
	token_t *t;
	int sreg;

	if ((e= gnu_get_operand(a, pn, deref)) == nil) return nil;

	t = get_token(*pn);

	if (t->symbol == ':' && IS_REGSEG(sreg = isregister(e->name))) {
		a->seg = segreg2seg(sreg);
		del_expr(e);
		(*pn)++;
		e = gnu_get_oplist(a, pn, deref);
	}
	else if (t->symbol == ',' || t->symbol == ':') {
		o1= e;
		(*pn)++;
		if ((o2= gnu_get_oplist(a, pn, deref)) == nil) {
			del_expr(o1);
			return nil;
		}
		e= new_expr();
		e->operator= ',';
		e->left= o2;
		e->right= o1;
	}
	return e;
}


static asm86_t *gnu_get_statement(void)
/* Get a pseudo op or machine instruction with arguments. */
{
	token_t *t= get_token(0);
	token_t *tn;
	asm86_t *a;
	mnemonic_t *m;
	int n;
	int prefix_seen;
	int deref;

	assert(t->type == T_WORD);

	a= new_asm86();

	/* Process instruction prefixes. */
	for (prefix_seen= 0;; prefix_seen= 1) {
		if (strcmp(t->name, "rep") == 0
			|| strcmp(t->name, "repe") == 0
			|| strcmp(t->name, "repne") == 0
			|| strcmp(t->name, "repz") == 0
			|| strcmp(t->name, "repnz") == 0
		) {
			if (a->rep != ONCE) {
				parse_err(1, t,
					"can't have more than one rep\n");
			}
			switch (t->name[3]) {
			case 0:		a->rep= REP;	break;
			case 'e':
			case 'z':	a->rep= REPE;	break;
			case 'n':	a->rep= REPNE;	break;
			}
		} else
		if (!prefix_seen) {
			/* No prefix here, get out! */
			break;
		} else {
			/* No more prefixes, next must be an instruction. */
			if (t->type != T_WORD
				|| (m= search_mnem(t->name)) == nil
				|| m->optype == PSEUDO
			) {
				parse_err(1, t,
		"machine instruction expected after instruction prefix\n");
				del_asm86(a);
				return nil;
			}
			break;
		}

		/* Skip the prefix and extra newlines. */
		do {
			skip_token(1);
		} while ((t= get_token(0))->symbol == ';');
	}

	/* All the readahead being done upsets the line counter. */
	a->line= t->line;

	/* Read a machine instruction or pseudo op. */
	if ((m= search_mnem(t->name)) == nil) {
		/* we assume that unknown stuff is part of unresolved macro */
		a->opcode = UNKNOWN;
		if (zap_unknown(a)) {
			parse_err(1, t, "unknown instruction '%s'\n", t->name);
			del_asm86(a);
			return nil;
		}
		return a;
	}
	a->opcode= m->opcode;
	a->optype= m->optype;
	a->oaz= 0;
	if (a->optype == OWORD) {
		a->oaz|= OPZ;
		a->optype= WORD;
	}
	else if (a->optype == JUMP16) {
		a->oaz|= OPZ;
		a->optype= JUMP;
	}

	switch (a->opcode) {
	case IN:
	case OUT:
	case INT:
		deref= 0;
		break;
	default:
		deref= (a->optype >= BYTE);
	}
	n= 1;
	if (get_token(1)->type != T_COMMENT && get_token(1)->symbol != ';'
			&& (a->args= gnu_get_oplist(a, &n, deref)) == nil) {
		del_asm86(a);
		return nil;
	}
	tn = get_token(n);
	if (tn->type == T_COMMENT) {
		a->raw_string = malloc(tn->len + 1);
		if (!a->raw_string)
			return NULL;

		strcpy(a->raw_string, tn->name);
	} else
	if (get_token(n)->symbol != ';') {
		parse_err(1, t, "garbage at end of instruction\n");
		del_asm86(a);
		return nil;
	}
	if (!is_pseudo(a->opcode)) {
		/* GNU operand order is the other way around. */
		expression_t *e, *t;

		e= a->args;
		while (e != nil && e->operator == ',') {
			t= e->right; e->right= e->left; e->left= t;
			e= e->left;
		}
	}
	switch (a->opcode) {
	case DOT_ALIGN:
		/* Delete two argument .align, because ACK can't do it.
		 */
		if (a->args == nil || a->args->operator != 'W') {
			del_asm86(a);
			return nil;
		}
		if (a->args != nil && a->args->operator == 'W'
			&& isanumber(a->args->name)
		) {
			unsigned n;
			char num[sizeof(int) * CHAR_BIT / 3 + 1];

			n= strtoul(a->args->name, nil, 0);
			sprintf(num, "%u", n);
			deallocate(a->args->name);
			a->args->name= copystr(num);
		}
		break;
	case DOT_DEFINE:
	case DOT_EXTERN:
		syms_add_global_csl(a->args);
		break;
	case DOT_COMM:
		syms_add_global(a->args->left->name);
		break;
	case DOT_LCOMM:
		syms_add(a->args->left->name);
		break;
	case JMPF:
	case CALLF:
		/*FALL THROUGH*/
	case JMP:
	case CALL:
		break;
	default:;
	}
	skip_token(n+1);
	return a;
}


asm86_t *gnu_get_instruction(void)
{
	asm86_t *a= nil;
	expression_t *e;
	token_t *t;

	while ((t= get_token(0))->symbol == ';' || t->symbol == '/') {
		zap();		/* if a comment started by a '/' */
		skip_token(1);
	}

	if (t->type == T_EOF) return nil;

	if (t->type == T_COMMENT || t->type == T_C_PREPROCESSOR) {

		a = new_asm86();
		if (t->type == T_COMMENT)
			a->opcode = COMMENT;
		else
			a->opcode = C_PREPROCESSOR;

		a->raw_string = malloc(t->len + 1);
		if (!a->raw_string)
			return NULL;

		strcpy(a->raw_string, t->name);
		skip_token(1);
		return a;
	}

	if (t->symbol == '#') {
		/* Preprocessor line and file change. */

		if ((t= get_token(1))->type != T_WORD || !isanumber(t->name)
			|| get_token(2)->type != T_STRING
		) {
			parse_err(1, t, "file not preprocessed?\n");
			zap();
		} else {
			set_file(get_token(2)->name,
				strtol(get_token(1)->name, nil, 0) - 1);

			/* GNU CPP adds extra cruft, simply zap the line. */
			zap();
		}
		a= gnu_get_instruction();
	} else
	if (t->type == T_WORD && get_token(1)->symbol == ':') {
		/* A label definition. */

		a= new_asm86();
		a->line= t->line;
		a->opcode= DOT_LABEL;
		a->optype= PSEUDO;
		a->args= e= new_expr();
		e->operator= ':';
		e->name= copystr(t->name);
		syms_add(t->name);
		skip_token(2);
	} else
	if (t->type == T_WORD && get_token(1)->symbol == '=') {
		int n= 2;

		if ((e= gnu_get_C_expression(&n)) == nil) {
			zap();
			a= gnu_get_instruction();
		} else
		if (get_token(n)->type != T_COMMENT && get_token(n)->symbol != ';') {
			parse_err(1, t, "garbage after assignment\n");
			zap();
			a= gnu_get_instruction();
		} else {
			a= new_asm86();
			if (get_token(n)->type == T_COMMENT) {
				token_t *c = get_token(n);

				a->raw_string = malloc(c->len + 1);
				if (!a->raw_string)
					return NULL;

				strcpy(a->raw_string, c->name);
			}
			a->line= t->line;
			a->opcode= DOT_EQU;
			a->optype= PSEUDO;
			a->args= new_expr();
			a->args->operator= '=';
			a->args->name= copystr(t->name);
			syms_add(t->name);
			a->args->middle= e;
			skip_token(n+1);
		}
	} else
	if (t->type == T_WORD) {
		if ((a= gnu_get_statement()) == nil) {
			zap();
			a= gnu_get_instruction();
		}
	} else {
		parse_err(1, t, "syntax error\n");
		zap();
		a= gnu_get_instruction();
	}
	return a;
}