minix/commands/asmconv/parse_bas.c
2010-05-12 16:28:54 +00:00

940 lines
22 KiB
C

/* parse_bas.c - parse BCC AS assembly Author: Kees J. Bot
* 13 Nov 1994
*/
#define nil 0
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "asmconv.h"
#include "token.h"
#include "asm86.h"
#include "languages.h"
typedef struct mnemonic { /* BAS mnemonics translation table. */
char *name;
opcode_t opcode;
optype_t optype;
} mnemonic_t;
static mnemonic_t mnemtab[] = { /* This array is sorted. */
{ ".align", DOT_ALIGN, PSEUDO },
{ ".ascii", DOT_ASCII, PSEUDO },
{ ".asciz", DOT_ASCIZ, PSEUDO },
{ ".assert", DOT_ASSERT, PSEUDO },
{ ".base", DOT_BASE, PSEUDO },
{ ".blkb", DOT_SPACE, PSEUDO },
{ ".bss", DOT_BSS, PSEUDO },
{ ".byte", DOT_DATA1, PSEUDO },
{ ".comm", DOT_COMM, PSEUDO },
{ ".data", DOT_DATA, PSEUDO },
{ ".define", DOT_DEFINE, PSEUDO },
{ ".end", DOT_END, PSEUDO },
{ ".even", DOT_ALIGN, PSEUDO },
{ ".extern", DOT_EXTERN, PSEUDO },
{ ".file", DOT_FILE, PSEUDO },
{ ".globl", DOT_DEFINE, PSEUDO },
{ ".lcomm", DOT_LCOMM, PSEUDO },
{ ".line", DOT_LINE, PSEUDO },
{ ".list", DOT_LIST, PSEUDO },
{ ".long", DOT_DATA4, PSEUDO },
{ ".nolist", DOT_NOLIST, PSEUDO },
{ ".rom", DOT_ROM, PSEUDO },
{ ".space", DOT_SPACE, PSEUDO },
{ ".symb", DOT_SYMB, PSEUDO },
{ ".text", DOT_TEXT, PSEUDO },
{ ".use16", DOT_USE16, PSEUDO },
{ ".use32", DOT_USE32, PSEUDO },
{ ".word", DOT_DATA2, PSEUDO },
{ ".zerob", DOT_SPACE, PSEUDO },
{ ".zerow", DOT_SPACE, PSEUDO },
{ "aaa", AAA, WORD },
{ "aad", AAD, WORD },
{ "aam", AAM, WORD },
{ "aas", AAS, WORD },
{ "adc", ADC, WORD },
{ "add", ADD, WORD },
{ "and", AND, WORD },
{ "arpl", ARPL, WORD },
{ "bc", JB, JUMP },
{ "beq", JE, JUMP },
{ "bge", JGE, JUMP },
{ "bgt", JG, JUMP },
{ "bhi", JA, JUMP },
{ "bhis", JAE, JUMP },
{ "ble", JLE, JUMP },
{ "blo", JB, JUMP },
{ "blos", JBE, JUMP },
{ "blt", JL, JUMP },
{ "bnc", JAE, JUMP },
{ "bne", JNE, JUMP },
{ "bound", BOUND, WORD },
{ "br", JMP, JUMP },
{ "bsf", BSF, WORD },
{ "bsr", BSR, WORD },
{ "bswap", BSWAP, WORD },
{ "bt", BT, WORD },
{ "btc", BTC, WORD },
{ "btr", BTR, WORD },
{ "bts", BTS, WORD },
{ "bz", JE, JUMP },
{ "call", CALL, JUMP },
{ "callf", CALLF, JUMP },
{ "cbw", CBW, WORD },
{ "cdq", CWD, WORD },
{ "clc", CLC, WORD },
{ "cld", CLD, WORD },
{ "cli", CLI, WORD },
{ "clts", CLTS, WORD },
{ "cmc", CMC, WORD },
{ "cmp", CMP, WORD },
{ "cmps", CMPS, WORD },
{ "cmpsb", CMPS, BYTE },
{ "cmpxchg", CMPXCHG, WORD },
{ "cwd", CWD, WORD },
{ "cwde", CBW, WORD },
{ "daa", DAA, WORD },
{ "das", DAS, WORD },
{ "dd", DOT_DATA4, PSEUDO },
{ "dec", DEC, WORD },
{ "div", DIV, WORD },
{ "enter", ENTER, WORD },
{ "export", DOT_DEFINE, PSEUDO },
{ "f2xm1", F2XM1, WORD },
{ "fabs", FABS, WORD },
{ "fadd", FADD, WORD },
{ "faddd", FADDD, WORD },
{ "faddp", FADDP, WORD },
{ "fadds", FADDS, WORD },
{ "fbld", FBLD, WORD },
{ "fbstp", FBSTP, WORD },
{ "fchs", FCHS, WORD },
{ "fclex", FCLEX, WORD },
{ "fcomd", FCOMD, WORD },
{ "fcompd", FCOMPD, WORD },
{ "fcompp", FCOMPP, WORD },
{ "fcomps", FCOMPS, WORD },
{ "fcoms", FCOMS, WORD },
{ "fcos", FCOS, WORD },
{ "fdecstp", FDECSTP, WORD },
{ "fdivd", FDIVD, WORD },
{ "fdivp", FDIVP, WORD },
{ "fdivrd", FDIVRD, WORD },
{ "fdivrp", FDIVRP, WORD },
{ "fdivrs", FDIVRS, WORD },
{ "fdivs", FDIVS, WORD },
{ "ffree", FFREE, WORD },
{ "fiaddl", FIADDL, WORD },
{ "fiadds", FIADDS, WORD },
{ "ficom", FICOM, WORD },
{ "ficomp", FICOMP, WORD },
{ "fidivl", FIDIVL, WORD },
{ "fidivrl", FIDIVRL, WORD },
{ "fidivrs", FIDIVRS, WORD },
{ "fidivs", FIDIVS, WORD },
{ "fildl", FILDL, WORD },
{ "fildq", FILDQ, WORD },
{ "filds", FILDS, WORD },
{ "fimull", FIMULL, WORD },
{ "fimuls", FIMULS, WORD },
{ "fincstp", FINCSTP, WORD },
{ "finit", FINIT, WORD },
{ "fistl", FISTL, WORD },
{ "fistp", FISTP, WORD },
{ "fists", FISTS, WORD },
{ "fisubl", FISUBL, WORD },
{ "fisubrl", FISUBRL, WORD },
{ "fisubrs", FISUBRS, WORD },
{ "fisubs", FISUBS, WORD },
{ "fld1", FLD1, WORD },
{ "fldcw", FLDCW, WORD },
{ "fldd", FLDD, WORD },
{ "fldenv", FLDENV, WORD },
{ "fldl2e", FLDL2E, WORD },
{ "fldl2t", FLDL2T, WORD },
{ "fldlg2", FLDLG2, WORD },
{ "fldln2", FLDLN2, WORD },
{ "fldpi", FLDPI, WORD },
{ "flds", FLDS, WORD },
{ "fldx", FLDX, WORD },
{ "fldz", FLDZ, WORD },
{ "fmuld", FMULD, WORD },
{ "fmulp", FMULP, WORD },
{ "fmuls", FMULS, WORD },
{ "fnop", FNOP, WORD },
{ "fpatan", FPATAN, WORD },
{ "fprem", FPREM, WORD },
{ "fprem1", FPREM1, WORD },
{ "fptan", FPTAN, WORD },
{ "frndint", FRNDINT, WORD },
{ "frstor", FRSTOR, WORD },
{ "fsave", FSAVE, WORD },
{ "fscale", FSCALE, WORD },
{ "fsin", FSIN, WORD },
{ "fsincos", FSINCOS, WORD },
{ "fsqrt", FSQRT, WORD },
{ "fstcw", FSTCW, WORD },
{ "fstd", FSTD, WORD },
{ "fstenv", FSTENV, WORD },
{ "fstpd", FSTPD, WORD },
{ "fstps", FSTPS, WORD },
{ "fstpx", FSTPX, WORD },
{ "fsts", FSTS, WORD },
{ "fstsw", FSTSW, WORD },
{ "fsubd", FSUBD, WORD },
{ "fsubp", FSUBP, WORD },
{ "fsubpr", FSUBPR, WORD },
{ "fsubrd", FSUBRD, WORD },
{ "fsubrs", FSUBRS, WORD },
{ "fsubs", FSUBS, WORD },
{ "ftst", FTST, WORD },
{ "fucom", FUCOM, WORD },
{ "fucomp", FUCOMP, WORD },
{ "fucompp", FUCOMPP, WORD },
{ "fxam", FXAM, WORD },
{ "fxch", FXCH, WORD },
{ "fxtract", FXTRACT, WORD },
{ "fyl2x", FYL2X, WORD },
{ "fyl2xp1", FYL2XP1, WORD },
{ "hlt", HLT, WORD },
{ "idiv", IDIV, WORD },
{ "imul", IMUL, WORD },
{ "in", IN, WORD },
{ "inb", IN, BYTE },
{ "inc", INC, WORD },
{ "ins", INS, WORD },
{ "insb", INS, BYTE },
{ "int", INT, WORD },
{ "into", INTO, JUMP },
{ "invd", INVD, WORD },
{ "invlpg", INVLPG, WORD },
{ "iret", IRET, JUMP },
{ "iretd", IRETD, JUMP },
{ "j", JMP, JUMP },
{ "ja", JA, JUMP },
{ "jae", JAE, JUMP },
{ "jb", JB, JUMP },
{ "jbe", JBE, JUMP },
{ "jc", JB, JUMP },
{ "jcxz", JCXZ, JUMP },
{ "je", JE, JUMP },
{ "jecxz", JCXZ, JUMP },
{ "jeq", JE, JUMP },
{ "jg", JG, JUMP },
{ "jge", JGE, JUMP },
{ "jgt", JG, JUMP },
{ "jhi", JA, JUMP },
{ "jhis", JAE, JUMP },
{ "jl", JL, JUMP },
{ "jle", JLE, JUMP },
{ "jlo", JB, JUMP },
{ "jlos", JBE, JUMP },
{ "jlt", JL, JUMP },
{ "jmp", JMP, JUMP },
{ "jmpf", JMPF, JUMP },
{ "jna", JBE, JUMP },
{ "jnae", JB, JUMP },
{ "jnb", JAE, JUMP },
{ "jnbe", JA, JUMP },
{ "jnc", JAE, JUMP },
{ "jne", JNE, JUMP },
{ "jng", JLE, JUMP },
{ "jnge", JL, JUMP },
{ "jnl", JGE, JUMP },
{ "jnle", JG, JUMP },
{ "jno", JNO, JUMP },
{ "jnp", JNP, JUMP },
{ "jns", JNS, JUMP },
{ "jnz", JNE, JUMP },
{ "jo", JO, JUMP },
{ "jp", JP, JUMP },
{ "js", JS, JUMP },
{ "jz", JE, JUMP },
{ "lahf", LAHF, WORD },
{ "lar", LAR, WORD },
{ "lds", LDS, WORD },
{ "lea", LEA, WORD },
{ "leave", LEAVE, WORD },
{ "les", LES, WORD },
{ "lfs", LFS, WORD },
{ "lgdt", LGDT, WORD },
{ "lgs", LGS, WORD },
{ "lidt", LIDT, WORD },
{ "lldt", LLDT, WORD },
{ "lmsw", LMSW, WORD },
{ "lock", LOCK, WORD },
{ "lods", LODS, WORD },
{ "lodsb", LODS, BYTE },
{ "loop", LOOP, JUMP },
{ "loope", LOOPE, JUMP },
{ "loopne", LOOPNE, JUMP },
{ "loopnz", LOOPNE, JUMP },
{ "loopz", LOOPE, JUMP },
{ "lsl", LSL, WORD },
{ "lss", LSS, WORD },
{ "ltr", LTR, WORD },
{ "mov", MOV, WORD },
{ "movs", MOVS, WORD },
{ "movsb", MOVS, BYTE },
{ "movsx", MOVSX, WORD },
{ "movzx", MOVZX, WORD },
{ "mul", MUL, WORD },
{ "neg", NEG, WORD },
{ "nop", NOP, WORD },
{ "not", NOT, WORD },
{ "or", OR, WORD },
{ "out", OUT, WORD },
{ "outb", OUT, BYTE },
{ "outs", OUTS, WORD },
{ "outsb", OUTS, BYTE },
{ "pop", POP, WORD },
{ "popa", POPA, WORD },
{ "popad", POPA, WORD },
{ "popf", POPF, WORD },
{ "popfd", POPF, WORD },
{ "push", PUSH, WORD },
{ "pusha", PUSHA, WORD },
{ "pushad", PUSHA, WORD },
{ "pushf", PUSHF, WORD },
{ "pushfd", PUSHF, WORD },
{ "rcl", RCL, WORD },
{ "rcr", RCR, WORD },
{ "ret", RET, JUMP },
{ "retf", RETF, JUMP },
{ "rol", ROL, WORD },
{ "ror", ROR, WORD },
{ "sahf", SAHF, WORD },
{ "sal", SAL, WORD },
{ "sar", SAR, WORD },
{ "sbb", SBB, WORD },
{ "scas", SCAS, WORD },
{ "seta", SETA, BYTE },
{ "setae", SETAE, BYTE },
{ "setb", SETB, BYTE },
{ "setbe", SETBE, BYTE },
{ "sete", SETE, BYTE },
{ "setg", SETG, BYTE },
{ "setge", SETGE, BYTE },
{ "setl", SETL, BYTE },
{ "setna", SETBE, BYTE },
{ "setnae", SETB, BYTE },
{ "setnb", SETAE, BYTE },
{ "setnbe", SETA, BYTE },
{ "setne", SETNE, BYTE },
{ "setng", SETLE, BYTE },
{ "setnge", SETL, BYTE },
{ "setnl", SETGE, BYTE },
{ "setnle", SETG, BYTE },
{ "setno", SETNO, BYTE },
{ "setnp", SETNP, BYTE },
{ "setns", SETNS, BYTE },
{ "seto", SETO, BYTE },
{ "setp", SETP, BYTE },
{ "sets", SETS, BYTE },
{ "setz", SETE, BYTE },
{ "sgdt", SGDT, WORD },
{ "shl", SHL, WORD },
{ "shld", SHLD, WORD },
{ "shr", SHR, WORD },
{ "shrd", SHRD, WORD },
{ "sidt", SIDT, WORD },
{ "sldt", SLDT, WORD },
{ "smsw", SMSW, WORD },
{ "stc", STC, WORD },
{ "std", STD, WORD },
{ "sti", STI, WORD },
{ "stos", STOS, WORD },
{ "stosb", STOS, BYTE },
{ "str", STR, WORD },
{ "sub", SUB, WORD },
{ "test", TEST, WORD },
{ "verr", VERR, WORD },
{ "verw", VERW, WORD },
{ "wait", WAIT, WORD },
{ "wbinvd", WBINVD, WORD },
{ "xadd", XADD, WORD },
{ "xchg", XCHG, WORD },
{ "xlat", XLAT, WORD },
{ "xor", XOR, WORD },
};
void bas_parse_init(char *file)
/* Prepare parsing of an BAS assembly file. */
{
tok_init(file, '!');
}
static void zap(void)
/* An error, zap the rest of the line. */
{
token_t *t;
while ((t= get_token(0))->type != T_EOF && t->symbol != ';')
skip_token(1);
}
static mnemonic_t *search_mnem(char *name)
/* Binary search for a mnemonic. (That's why the table is sorted.) */
{
int low, mid, high;
int cmp;
mnemonic_t *m;
low= 0;
high= arraysize(mnemtab)-1;
while (low <= high) {
mid= (low + high) / 2;
m= &mnemtab[mid];
if ((cmp= strcmp(name, m->name)) == 0) return m;
if (cmp < 0) high= mid-1; else low= mid+1;
}
return nil;
}
static expression_t *bas_get_C_expression(int *pn)
/* Read a "C-like" expression. Note that we don't worry about precedence,
* the expression is printed later like it is read. If the target language
* does not have all the operators (like ~) then this has to be repaired by
* changing the source file. (No problem, you still have one source file
* to maintain, not two.)
*/
{
expression_t *e, *a1, *a2;
token_t *t;
if ((t= get_token(*pn))->symbol == '(') {
/* ( expr ): grouping. */
(*pn)++;
if ((a1= bas_get_C_expression(pn)) == nil) return nil;
if (get_token(*pn)->symbol != ')') {
parse_err(1, t, "missing )\n");
del_expr(a1);
return nil;
}
(*pn)++;
e= new_expr();
e->operator= '[';
e->middle= a1;
} else
if (t->type == T_WORD || t->type == T_STRING) {
/* Label, number, or string. */
e= new_expr();
e->operator= t->type == T_WORD ? 'W' : 'S';
e->name= allocate(nil, (t->len+1) * sizeof(e->name[0]));
memcpy(e->name, t->name, t->len+1);
e->len= t->len;
(*pn)++;
} else
if (t->symbol == '+' || t->symbol == '-' || t->symbol == '~') {
/* Unary operator. */
(*pn)++;
if ((a1= bas_get_C_expression(pn)) == nil) return nil;
e= new_expr();
e->operator= t->symbol;
e->middle= a1;
} else
if (t->symbol == '$' && get_token(*pn + 1)->type == T_WORD) {
/* A hexadecimal number. */
t= get_token(*pn + 1);
e= new_expr();
e->operator= 'W';
e->name= allocate(nil, (t->len+3) * sizeof(e->name[0]));
strcpy(e->name, "0x");
memcpy(e->name+2, t->name, t->len+1);
e->len= t->len+2;
(*pn)+= 2;
} else {
parse_err(1, t, "expression syntax error\n");
return nil;
}
switch ((t= get_token(*pn))->symbol) {
case '+':
case '-':
case '*':
case '/':
case '%':
case '&':
case '|':
case '^':
case S_LEFTSHIFT:
case S_RIGHTSHIFT:
(*pn)++;
a1= e;
if ((a2= bas_get_C_expression(pn)) == nil) {
del_expr(a1);
return nil;
}
e= new_expr();
e->operator= t->symbol;
e->left= a1;
e->right= a2;
}
return e;
}
/* We want to know the sizes of the first two operands. */
static optype_t optypes[2];
static int op_idx;
static expression_t *bas_get_operand(int *pn)
/* Get something like: [memory], offset[base+index*scale], or simpler. */
{
expression_t *e, *offset, *base, *index;
token_t *t;
int c;
optype_t optype;
/* Prefixed by 'byte', 'word' or 'dword'? */
if ((t= get_token(*pn))->type == T_WORD && (
strcmp(t->name, "byte") == 0
|| strcmp(t->name, "word") == 0
|| strcmp(t->name, "dword") == 0)
) {
switch (t->name[0]) {
case 'b': optype= BYTE; break;
case 'w': optype= use16() ? WORD : OWORD; break;
case 'd': optype= use32() ? WORD : OWORD; break;
}
if (op_idx < arraysize(optypes)) optypes[op_idx++]= optype;
(*pn)++;
/* It may even be "byte ptr"... */
if ((t= get_token(*pn))->type == T_WORD
&& strcmp(t->name, "ptr") == 0) {
(*pn)++;
}
}
/* Is it [memory]? */
if (get_token(*pn)->symbol == '['
&& ((t= get_token(*pn + 1))->type != T_WORD
|| !isregister(t->name))
) {
/* A memory dereference. */
(*pn)++;
if ((offset= bas_get_C_expression(pn)) == nil) return nil;
if (get_token(*pn)->symbol != ']') {
parse_err(1, t, "operand syntax error\n");
del_expr(offset);
return nil;
}
(*pn)++;
e= new_expr();
e->operator= '(';
e->middle= offset;
return e;
}
/* #something? *something? */
if ((c= get_token(*pn)->symbol) == '#' || c == '*') {
/* '#' and '*' are often used to introduce some constant. */
(*pn)++;
}
/* Offset? */
if (get_token(*pn)->symbol != '[') {
/* There is an offset. */
if ((offset= bas_get_C_expression(pn)) == nil) return nil;
} else {
/* No offset. */
offset= nil;
}
/* [base]? [base+? base-? */
c= 0;
if (get_token(*pn)->symbol == '['
&& (t= get_token(*pn + 1))->type == T_WORD
&& isregister(t->name)
&& ((c= get_token(*pn + 2)->symbol) == ']' || c=='+' || c=='-')
) {
/* A base register expression. */
base= new_expr();
base->operator= 'B';
base->name= copystr(t->name);
(*pn)+= c == ']' ? 3 : 2;
} else {
/* No base register expression. */
base= nil;
}
/* +offset]? -offset]? */
if (offset == nil
&& (c == '+' || c == '-')
&& (t= get_token(*pn + 1))->type == T_WORD
&& !isregister(t->name)
) {
(*pn)++;
if ((offset= bas_get_C_expression(pn)) == nil) return nil;
if (get_token(*pn)->symbol != ']') {
parse_err(1, t, "operand syntax error\n");
del_expr(offset);
del_expr(base);
return nil;
}
(*pn)++;
c= 0;
}
/* [index*scale]? +index*scale]? */
if (c == '+' || get_token(*pn)->symbol == '[') {
/* An index most likely. */
token_t *m= nil;
if (!( /* This must be true: */
(t= get_token(*pn + 1))->type == T_WORD
&& isregister(t->name)
&& (get_token(*pn + 2)->symbol == ']' || (
get_token(*pn + 2)->symbol == '*'
&& (m= get_token(*pn + 3))->type == T_WORD
&& strchr("1248", m->name[0]) != nil
&& m->name[1] == 0
&& get_token(*pn + 4)->symbol == ']'
))
)) {
/* Alas it isn't */
parse_err(1, t, "operand syntax error\n");
del_expr(offset);
del_expr(base);
return nil;
}
/* Found an index. */
index= new_expr();
index->operator= m == nil ? '1' : m->name[0];
index->name= copystr(t->name);
(*pn)+= (m == nil ? 3 : 5);
} else {
/* No index. */
index= nil;
}
if (base == nil && index == nil) {
/* Return a lone offset as is. */
e= offset;
/* Lone registers tell operand size. */
if (offset->operator == 'W' && isregister(offset->name)) {
switch (isregister(offset->name)) {
case 1: optype= BYTE; break;
case 2: optype= use16() ? WORD : OWORD; break;
case 4: optype= use32() ? WORD : OWORD; break;
}
if (op_idx < arraysize(optypes))
optypes[op_idx++]= optype;
}
} else {
e= new_expr();
e->operator= 'O';
e->left= offset;
e->middle= base;
e->right= index;
}
return e;
}
static expression_t *bas_get_oplist(int *pn)
/* Get a comma (or colon for jmpf and callf) separated list of instruction
* operands.
*/
{
expression_t *e, *o1, *o2;
token_t *t;
if ((e= bas_get_operand(pn)) == nil) return nil;
if ((t= get_token(*pn))->symbol == ',' || t->symbol == ':') {
o1= e;
(*pn)++;
if ((o2= bas_get_oplist(pn)) == nil) {
del_expr(o1);
return nil;
}
e= new_expr();
e->operator= ',';
e->left= o1;
e->right= o2;
}
return e;
}
static asm86_t *bas_get_statement(void)
/* Get a pseudo op or machine instruction with arguments. */
{
token_t *t= get_token(0);
asm86_t *a;
mnemonic_t *m;
int n;
int prefix_seen;
assert(t->type == T_WORD);
if (strcmp(t->name, ".sect") == 0) {
/* .sect .text etc. Accept only four segment names. */
skip_token(1);
t= get_token(0);
if (t->type != T_WORD || (
strcmp(t->name, ".text") != 0
&& strcmp(t->name, ".rom") != 0
&& strcmp(t->name, ".data") != 0
&& strcmp(t->name, ".bss") != 0
&& strcmp(t->name, ".end") != 0
)) {
parse_err(1, t, "weird section name to .sect\n");
return nil;
}
}
a= new_asm86();
/* Process instruction prefixes. */
for (prefix_seen= 0;; prefix_seen= 1) {
if (strcmp(t->name, "rep") == 0
|| strcmp(t->name, "repe") == 0
|| strcmp(t->name, "repne") == 0
|| strcmp(t->name, "repz") == 0
|| strcmp(t->name, "repnz") == 0
) {
if (a->rep != ONCE) {
parse_err(1, t,
"can't have more than one rep\n");
}
switch (t->name[3]) {
case 0: a->rep= REP; break;
case 'e':
case 'z': a->rep= REPE; break;
case 'n': a->rep= REPNE; break;
}
} else
if (strcmp(t->name, "seg") == 0
&& get_token(1)->type == T_WORD) {
if (a->seg != DEFSEG) {
parse_err(1, t,
"can't have more than one segment prefix\n");
}
switch (get_token(1)->name[0]) {
case 'c': a->seg= CSEG; break;
case 'd': a->seg= DSEG; break;
case 'e': a->seg= ESEG; break;
case 'f': a->seg= FSEG; break;
case 'g': a->seg= GSEG; break;
case 's': a->seg= SSEG; break;
}
skip_token(1);
} else
if (!prefix_seen) {
/* No prefix here, get out! */
break;
} else {
/* No more prefixes, next must be an instruction. */
if (t->type != T_WORD
|| (m= search_mnem(t->name)) == nil
|| m->optype == PSEUDO
) {
parse_err(1, t,
"machine instruction expected after instruction prefix\n");
del_asm86(a);
return nil;
}
break;
}
/* Skip the prefix and extra newlines. */
do {
skip_token(1);
} while ((t= get_token(0))->symbol == ';');
}
/* All the readahead being done upsets the line counter. */
a->line= t->line;
/* Read a machine instruction or pseudo op. */
if ((m= search_mnem(t->name)) == nil) {
parse_err(1, t, "unknown instruction '%s'\n", t->name);
del_asm86(a);
return nil;
}
a->opcode= m->opcode;
a->optype= m->optype;
if (a->opcode == CBW || a->opcode == CWD) {
a->optype= (strcmp(t->name, "cbw") == 0
|| strcmp(t->name, "cwd") == 0) == use16() ? WORD : OWORD;
}
for (op_idx= 0; op_idx < arraysize(optypes); op_idx++)
optypes[op_idx]= m->optype;
op_idx= 0;
n= 1;
if (get_token(1)->symbol != ';'
&& (a->args= bas_get_oplist(&n)) == nil) {
del_asm86(a);
return nil;
}
if (m->optype == WORD) {
/* Does one of the operands overide the optype? */
for (op_idx= 0; op_idx < arraysize(optypes); op_idx++) {
if (optypes[op_idx] != m->optype)
a->optype= optypes[op_idx];
}
}
if (get_token(n)->symbol != ';') {
parse_err(1, t, "garbage at end of instruction\n");
del_asm86(a);
return nil;
}
switch (a->opcode) {
case DOT_ALIGN:
/* Restrict .align to have a single numeric argument, some
* assemblers think of the argument as a power of two, so
* we need to be able to change the value.
*/
if (strcmp(t->name, ".even") == 0 && a->args == nil) {
/* .even becomes .align 2. */
expression_t *e;
a->args= e= new_expr();
e->operator= 'W';
e->name= copystr("2");
e->len= 2;
}
if (a->args == nil || a->args->operator != 'W'
|| !isanumber(a->args->name)) {
parse_err(1, t,
".align is restricted to one numeric argument\n");
del_asm86(a);
return nil;
}
break;
case MOVSX:
case MOVZX:
/* Types of both operands tell the instruction type. */
a->optype= optypes[0];
if (optypes[1] == BYTE) {
a->opcode= a->opcode == MOVSX ? MOVSXB : MOVZXB;
}
break;
case SAL:
case SAR:
case SHL:
case SHR:
case RCL:
case RCR:
case ROL:
case ROR:
/* Only the first operand tells the operand size. */
a->optype= optypes[0];
break;
default:;
}
skip_token(n+1);
return a;
}
asm86_t *bas_get_instruction(void)
{
asm86_t *a= nil;
expression_t *e;
token_t *t;
while ((t= get_token(0))->symbol == ';')
skip_token(1);
if (t->type == T_EOF) return nil;
if (t->symbol == '#') {
/* Preprocessor line and file change. */
if ((t= get_token(1))->type != T_WORD || !isanumber(t->name)
|| get_token(2)->type != T_STRING
) {
parse_err(1, t, "file not preprocessed?\n");
zap();
} else {
set_file(get_token(2)->name,
strtol(get_token(1)->name, nil, 0) - 1);
/* GNU CPP adds extra cruft, simply zap the line. */
zap();
}
a= bas_get_instruction();
} else
if (t->type == T_WORD && get_token(1)->symbol == ':') {
/* A label definition. */
a= new_asm86();
a->line= t->line;
a->opcode= DOT_LABEL;
a->optype= PSEUDO;
a->args= e= new_expr();
e->operator= ':';
e->name= copystr(t->name);
skip_token(2);
} else
if (t->type == T_WORD && get_token(1)->symbol == '=') {
int n= 2;
if ((e= bas_get_C_expression(&n)) == nil) {
zap();
a= bas_get_instruction();
} else
if (get_token(n)->symbol != ';') {
parse_err(1, t, "garbage after assignment\n");
zap();
a= bas_get_instruction();
} else {
a= new_asm86();
a->line= t->line;
a->opcode= DOT_EQU;
a->optype= PSEUDO;
a->args= new_expr();
a->args->operator= '=';
a->args->name= copystr(t->name);
a->args->middle= e;
skip_token(n+1);
}
} else
if (t->type == T_WORD && get_token(1)->type == T_WORD
&& strcmp(get_token(1)->name, "lcomm") == 0) {
/* Local common block definition. */
int n= 2;
if ((e= bas_get_C_expression(&n)) == nil) {
zap();
a= bas_get_instruction();
} else
if (get_token(n)->symbol != ';') {
parse_err(1, t, "garbage after lcomm\n");
zap();
a= bas_get_instruction();
} else {
a= new_asm86();
a->line= t->line;
a->opcode= DOT_LCOMM;
a->optype= PSEUDO;
a->args= new_expr();
a->args->operator= ',';
a->args->right= e;
a->args->left= e= new_expr();
e->operator= 'W';
e->name= copystr(t->name);
e->len= strlen(e->name)+1;
skip_token(n+1);
}
} else
if (t->type == T_WORD) {
if ((a= bas_get_statement()) == nil) {
zap();
a= bas_get_instruction();
}
} else {
parse_err(1, t, "syntax error\n");
zap();
a= bas_get_instruction();
}
if (a->optype == OWORD) {
a->optype= WORD;
a->oaz|= OPZ;
}
return a;
}