/* $NetBSD: ccl.c,v 1.1.1.1 2009/10/26 00:25:06 christos Exp $ */ /* ccl - routines for character classes */ /* Copyright (c) 1990 The Regents of the University of California. */ /* All rights reserved. */ /* This code is derived from software contributed to Berkeley by */ /* Vern Paxson. */ /* The United States Government has rights in this work pursuant */ /* to contract no. DE-AC03-76SF00098 between the United States */ /* Department of Energy and the University of California. */ /* This file is part of flex. */ /* Redistribution and use in source and binary forms, with or without */ /* modification, are permitted provided that the following conditions */ /* are met: */ /* 1. Redistributions of source code must retain the above copyright */ /* notice, this list of conditions and the following disclaimer. */ /* 2. Redistributions in binary form must reproduce the above copyright */ /* notice, this list of conditions and the following disclaimer in the */ /* documentation and/or other materials provided with the distribution. */ /* Neither the name of the University nor the names of its contributors */ /* may be used to endorse or promote products derived from this software */ /* without specific prior written permission. */ /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ /* PURPOSE. */ #include "flexdef.h" /* return true if the chr is in the ccl. Takes negation into account. */ static bool ccl_contains (const int cclp, const int ch) { int ind, len, i; len = ccllen[cclp]; ind = cclmap[cclp]; for (i = 0; i < len; ++i) if (ccltbl[ind + i] == ch) return !cclng[cclp]; return cclng[cclp]; } /* ccladd - add a single character to a ccl */ void ccladd (cclp, ch) int cclp; int ch; { int ind, len, newpos, i; check_char (ch); len = ccllen[cclp]; ind = cclmap[cclp]; /* check to see if the character is already in the ccl */ for (i = 0; i < len; ++i) if (ccltbl[ind + i] == ch) return; /* mark newlines */ if (ch == nlch) ccl_has_nl[cclp] = true; newpos = ind + len; if (newpos >= current_max_ccl_tbl_size) { current_max_ccl_tbl_size += MAX_CCL_TBL_SIZE_INCREMENT; ++num_reallocs; ccltbl = reallocate_Character_array (ccltbl, current_max_ccl_tbl_size); } ccllen[cclp] = len + 1; ccltbl[newpos] = ch; } /* dump_cclp - same thing as list_character_set, but for cclps. */ static void dump_cclp (FILE* file, int cclp) { register int i; putc ('[', file); for (i = 0; i < csize; ++i) { if (ccl_contains(cclp, i)){ register int start_char = i; putc (' ', file); fputs (readable_form (i), file); while (++i < csize && ccl_contains(cclp,i)) ; if (i - 1 > start_char) /* this was a run */ fprintf (file, "-%s", readable_form (i - 1)); putc (' ', file); } } putc (']', file); } /* ccl_set_diff - create a new ccl as the set difference of the two given ccls. */ int ccl_set_diff (int a, int b) { int d, ch; /* create new class */ d = cclinit(); /* In order to handle negation, we spin through all possible chars, * addding each char in a that is not in b. * (This could be O(n^2), but n is small and bounded.) */ for ( ch = 0; ch < csize; ++ch ) if (ccl_contains (a, ch) && !ccl_contains(b, ch)) ccladd (d, ch); /* debug */ if (0){ fprintf(stderr, "ccl_set_diff ("); fprintf(stderr, "\n "); dump_cclp (stderr, a); fprintf(stderr, "\n "); dump_cclp (stderr, b); fprintf(stderr, "\n "); dump_cclp (stderr, d); fprintf(stderr, "\n)\n"); } return d; } /* ccl_set_union - create a new ccl as the set union of the two given ccls. */ int ccl_set_union (int a, int b) { int d, i; /* create new class */ d = cclinit(); /* Add all of a */ for (i = 0; i < ccllen[a]; ++i) ccladd (d, ccltbl[cclmap[a] + i]); /* Add all of b */ for (i = 0; i < ccllen[b]; ++i) ccladd (d, ccltbl[cclmap[b] + i]); /* debug */ if (0){ fprintf(stderr, "ccl_set_union (%d + %d = %d", a, b, d); fprintf(stderr, "\n "); dump_cclp (stderr, a); fprintf(stderr, "\n "); dump_cclp (stderr, b); fprintf(stderr, "\n "); dump_cclp (stderr, d); fprintf(stderr, "\n)\n"); } return d; } /* cclinit - return an empty ccl */ int cclinit () { if (++lastccl >= current_maxccls) { current_maxccls += MAX_CCLS_INCREMENT; ++num_reallocs; cclmap = reallocate_integer_array (cclmap, current_maxccls); ccllen = reallocate_integer_array (ccllen, current_maxccls); cclng = reallocate_integer_array (cclng, current_maxccls); ccl_has_nl = reallocate_bool_array (ccl_has_nl, current_maxccls); } if (lastccl == 1) /* we're making the first ccl */ cclmap[lastccl] = 0; else /* The new pointer is just past the end of the last ccl. * Since the cclmap points to the \first/ character of a * ccl, adding the length of the ccl to the cclmap pointer * will produce a cursor to the first free space. */ cclmap[lastccl] = cclmap[lastccl - 1] + ccllen[lastccl - 1]; ccllen[lastccl] = 0; cclng[lastccl] = 0; /* ccl's start out life un-negated */ ccl_has_nl[lastccl] = false; return lastccl; } /* cclnegate - negate the given ccl */ void cclnegate (cclp) int cclp; { cclng[cclp] = 1; ccl_has_nl[cclp] = !ccl_has_nl[cclp]; } /* list_character_set - list the members of a set of characters in CCL form * * Writes to the given file a character-class representation of those * characters present in the given CCL. A character is present if it * has a non-zero value in the cset array. */ void list_character_set (file, cset) FILE *file; int cset[]; { register int i; putc ('[', file); for (i = 0; i < csize; ++i) { if (cset[i]) { register int start_char = i; putc (' ', file); fputs (readable_form (i), file); while (++i < csize && cset[i]) ; if (i - 1 > start_char) /* this was a run */ fprintf (file, "-%s", readable_form (i - 1)); putc (' ', file); } } putc (']', file); } /** Determines if the range [c1-c2] is unambiguous in a case-insensitive * scanner. Specifically, if a lowercase or uppercase character, x, is in the * range [c1-c2], then we require that UPPERCASE(x) and LOWERCASE(x) must also * be in the range. If not, then this range is ambiguous, and the function * returns false. For example, [@-_] spans [a-z] but not [A-Z]. Beware that * [a-z] will be labeled ambiguous because it does not include [A-Z]. * * @param c1 the lower end of the range * @param c2 the upper end of the range * @return true if [c1-c2] is not ambiguous for a caseless scanner. */ bool range_covers_case (int c1, int c2) { int i, o; for (i = c1; i <= c2; i++) { if (has_case (i)) { o = reverse_case (i); if (o < c1 || c2 < o) return false; } } return true; } /** Reverse the case of a character, if possible. * @return c if case-reversal does not apply. */ int reverse_case (int c) { return isupper (c) ? tolower (c) : (islower (c) ? toupper (c) : c); } /** Return true if c is uppercase or lowercase. */ bool has_case (int c) { return (isupper (c) || islower (c)) ? true : false; }