2005-04-21 16:53:53 +02:00
|
|
|
/* join - relation data base operator Author: Saeko Hirabayashi */
|
|
|
|
|
|
|
|
/* Written by Saeko Hirabayashi, 1989.
|
|
|
|
* 1992-01-28 Modified by Kouichi Hirabayashi to add some POSIX1003.2 options.
|
|
|
|
*
|
|
|
|
* This a free program.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <string.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
|
|
|
|
#define MAXFLD 200 /* maximum # of fields to accept */
|
|
|
|
|
2012-03-24 16:16:34 +01:00
|
|
|
int main(int argc, char **argv);
|
|
|
|
void error(char *s, char *t);
|
|
|
|
void usage(void);
|
|
|
|
void match(void);
|
|
|
|
void f1_only(void);
|
|
|
|
void f2_only(void);
|
|
|
|
void output(int flag);
|
|
|
|
void outfld(int file);
|
|
|
|
void outputf(int flag);
|
|
|
|
int compare(void);
|
|
|
|
int get1(void);
|
|
|
|
int get2(int back);
|
|
|
|
int getrec(int file);
|
|
|
|
int split(int file);
|
|
|
|
int atoi(char *str);
|
|
|
|
int exit(int val);
|
|
|
|
FILE * efopen(char *file, char *mode);
|
|
|
|
void(*outfun) (int file);
|
2005-04-21 16:53:53 +02:00
|
|
|
|
|
|
|
#define F1 1
|
|
|
|
#define F2 2
|
|
|
|
#define SEP (sep ? sep : ' ')
|
|
|
|
|
|
|
|
FILE *fp[2]; /* file pointer for file1 and file2 */
|
|
|
|
long head; /* head of the current (same)key group of the
|
|
|
|
* file2 */
|
|
|
|
|
|
|
|
char buf[2][BUFSIZ]; /* input buffer for file1 and file2 */
|
|
|
|
char *fld[2][MAXFLD]; /* field vector for file1 and file2 */
|
|
|
|
int nfld[2]; /* # of fields for file1 and file2 */
|
|
|
|
|
|
|
|
int kpos[2]; /* key field position for file1 and file2
|
|
|
|
* (from 0) */
|
|
|
|
char oldkey[BUFSIZ]; /* previous key of the file1 */
|
|
|
|
|
|
|
|
struct { /* output list by -o option */
|
|
|
|
int o_file; /* file #: 0 or 1 */
|
|
|
|
int o_field; /* field #: 0, 1, 2, .. */
|
|
|
|
} olist[MAXFLD];
|
|
|
|
int nout; /* # of output filed */
|
|
|
|
|
|
|
|
int aflag; /* n for '-an': F1 or F2 or both */
|
|
|
|
int vflag; /* n for '-vn': F1 or F2 or both */
|
|
|
|
char *es; /* s for '-e s' */
|
|
|
|
char sep; /* c for -tc: filed separator */
|
|
|
|
char *cmd; /* name of this program */
|
|
|
|
|
2010-07-06 14:10:23 +02:00
|
|
|
int main(argc, argv)
|
2005-04-21 16:53:53 +02:00
|
|
|
int argc;
|
|
|
|
char **argv;
|
|
|
|
{
|
|
|
|
register char *s;
|
|
|
|
int c, i, j;
|
|
|
|
|
|
|
|
cmd = argv[0];
|
|
|
|
outfun = output; /* default output form */
|
|
|
|
|
|
|
|
while (--argc > 0 && (*++argv)[0] == '-' && (*argv)[1]) {
|
|
|
|
/* "-" is a file name (stdin) */
|
|
|
|
s = argv[0] + 1;
|
|
|
|
if ((c = *s) == '-' && !s[1]) {
|
|
|
|
++argv;
|
|
|
|
--argc;
|
|
|
|
break; /* -- */
|
|
|
|
}
|
|
|
|
if (*++s == '\0') {
|
|
|
|
s = *++argv;
|
|
|
|
--argc;
|
|
|
|
}
|
|
|
|
switch (c) {
|
|
|
|
case 'a': /* add unpairable line to output */
|
|
|
|
vflag = 0;
|
|
|
|
switch (*s) {
|
|
|
|
case '1': aflag |= F1; break;
|
|
|
|
case '2': aflag |= F2; break;
|
|
|
|
default: aflag |= (F1 | F2); break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'e': /* replace empty field by es */
|
|
|
|
es = s;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'j': /* key field (obsolute) */
|
|
|
|
c = *s++;
|
|
|
|
if (*s == '\0') {
|
|
|
|
s = *++argv;
|
|
|
|
--argc;
|
|
|
|
}
|
|
|
|
|
|
|
|
case '1': /* key field of file1 */
|
|
|
|
case '2': /* key field of file2 */
|
|
|
|
i = atoi(s) - 1;
|
|
|
|
|
|
|
|
switch (c) {
|
|
|
|
case '1': kpos[0] = i; break;
|
|
|
|
case '2': kpos[1] = i; break;
|
|
|
|
default: kpos[0] = kpos[1] = i;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'o': /* specify output format */
|
|
|
|
do {
|
|
|
|
i = j = 0;
|
|
|
|
sscanf(s, "%d.%d", &i, &j);
|
|
|
|
if (i < 1 || j < 1 || i > 2) usage();
|
|
|
|
olist[nout].o_file = i - 1;
|
|
|
|
olist[nout].o_field = j - 1;
|
|
|
|
nout++;
|
|
|
|
if ((s = strchr(s, ',')) != (char *) 0)
|
|
|
|
s++;
|
|
|
|
else {
|
|
|
|
s = *++argv;
|
|
|
|
--argc;
|
|
|
|
}
|
|
|
|
} while (argc > 2 && *s != '-');
|
|
|
|
++argc;
|
|
|
|
--argv; /* compensation */
|
|
|
|
outfun = outputf;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 't': /* tab char */
|
|
|
|
sep = *s;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'v': /* output unpairable line only */
|
|
|
|
aflag = 0;
|
|
|
|
switch (*s) {
|
|
|
|
case '1': vflag |= F1; break;
|
|
|
|
case '2': vflag |= F2; break;
|
|
|
|
default: vflag |= (F1 | F2); break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
default: usage();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (argc != 2) usage();
|
|
|
|
|
|
|
|
fp[0] = strcmp(argv[0], "-") ? efopen(argv[0], "r") : stdin;
|
|
|
|
fp[1] = efopen(argv[1], "r");
|
|
|
|
|
|
|
|
nfld[0] = get1(); /* read file1 */
|
|
|
|
nfld[1] = get2(0); /* read file2 */
|
|
|
|
|
|
|
|
while (nfld[0] || nfld[1]) {
|
|
|
|
if ((i = compare()) == 0)
|
|
|
|
match();
|
|
|
|
else if (i < 0)
|
|
|
|
f1_only();
|
|
|
|
else
|
|
|
|
f2_only();
|
|
|
|
}
|
|
|
|
fflush(stdout);
|
|
|
|
|
|
|
|
exit(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
void usage()
|
|
|
|
{
|
|
|
|
fprintf(stderr,
|
|
|
|
"Usage: %s [-an|-vn] [-e str] [-o list] [-tc] [-1 f] [-2 f] file1 file2\n",
|
|
|
|
cmd);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
int compare()
|
|
|
|
{ /* compare key field */
|
|
|
|
register int r;
|
|
|
|
|
|
|
|
if (nfld[1] == 0) /* file2 EOF */
|
|
|
|
r = -1;
|
|
|
|
else if (nfld[0] == 0) /* file1 EOF */
|
|
|
|
r = 1;
|
|
|
|
else {
|
|
|
|
if (nfld[0] <= kpos[0])
|
|
|
|
error("missing key field in file1", (char *) 0);
|
|
|
|
if (nfld[1] <= kpos[1])
|
|
|
|
error("missing key field in file2", (char *) 0);
|
|
|
|
|
|
|
|
r = strcmp(fld[0][kpos[0]], fld[1][kpos[1]]);
|
|
|
|
}
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
void match()
|
|
|
|
{
|
|
|
|
long p;
|
|
|
|
|
|
|
|
if (!vflag) (*outfun) (F1 | F2);
|
|
|
|
|
|
|
|
p = ftell(fp[1]);
|
|
|
|
nfld[1] = get2(0); /* check key order */
|
|
|
|
if (nfld[1] == 0 || strcmp(fld[0][kpos[0]], fld[1][kpos[1]])) {
|
|
|
|
nfld[0] = get1();
|
|
|
|
if (strcmp(fld[0][kpos[0]], oldkey) == 0) {
|
|
|
|
fseek(fp[1], head, 0); /* re-do from head */
|
|
|
|
nfld[1] = get2(1); /* don't check key order */
|
|
|
|
} else
|
|
|
|
head = p; /* mark here */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void f1_only()
|
|
|
|
{
|
|
|
|
if ((aflag & F1) || (vflag & F1)) (*outfun) (F1);
|
|
|
|
nfld[0] = get1();
|
|
|
|
}
|
|
|
|
|
|
|
|
void f2_only()
|
|
|
|
{
|
|
|
|
if ((aflag & F2) || (vflag & F2)) (*outfun) (F2);
|
|
|
|
head = ftell(fp[1]); /* mark */
|
|
|
|
nfld[1] = get2(0); /* check key order */
|
|
|
|
}
|
|
|
|
|
|
|
|
void output(f)
|
|
|
|
{ /* default output form */
|
|
|
|
if (f & F1)
|
|
|
|
fputs(fld[0][kpos[0]], stdout);
|
|
|
|
else
|
|
|
|
fputs(fld[1][kpos[1]], stdout);
|
|
|
|
if (f & F1) outfld(0);
|
|
|
|
if (f & F2) outfld(1);
|
|
|
|
fputc('\n', stdout);
|
|
|
|
}
|
|
|
|
|
|
|
|
void outfld(file)
|
|
|
|
{ /* output all fields except key_field */
|
|
|
|
register int i;
|
|
|
|
int k, n;
|
|
|
|
|
|
|
|
k = kpos[file];
|
|
|
|
n = nfld[file];
|
|
|
|
for (i = 0; i < n; i++)
|
|
|
|
if (i != k) {
|
|
|
|
fputc(SEP, stdout);
|
|
|
|
fputs(fld[file][i], stdout);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void outputf(f)
|
|
|
|
{ /* output by '-o list' */
|
|
|
|
int i, j, k;
|
|
|
|
register char *s;
|
|
|
|
|
|
|
|
for (i = k = 0; i < nout; i++) {
|
|
|
|
j = olist[i].o_file;
|
|
|
|
if ((f & (j + 1)) && (olist[i].o_field < nfld[j]))
|
|
|
|
s = fld[j][olist[i].o_field];
|
|
|
|
else
|
|
|
|
s = es;
|
|
|
|
if (s) {
|
|
|
|
if (k++) fputc(SEP, stdout);
|
|
|
|
fputs(s, stdout);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
fputc('\n', stdout);
|
|
|
|
}
|
|
|
|
|
|
|
|
int get1()
|
|
|
|
{ /* read file1 */
|
|
|
|
int r;
|
|
|
|
static char oldkey1[BUFSIZ];
|
|
|
|
|
|
|
|
if (fld[0][kpos[0]]) {
|
|
|
|
strcpy(oldkey, fld[0][kpos[0]]); /* save previous key for control */
|
|
|
|
}
|
|
|
|
r = getrec(0);
|
|
|
|
|
|
|
|
if (r) {
|
|
|
|
if (strcmp(oldkey1, fld[0][kpos[0]]) > 0)
|
|
|
|
error("file1 is not sorted", (char *) 0);
|
|
|
|
strcpy(oldkey1, fld[0][kpos[0]]); /* save prev key for sort check */
|
|
|
|
}
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
int get2(back)
|
|
|
|
{ /* read file2 */
|
|
|
|
static char oldkey2[BUFSIZ];
|
|
|
|
int r;
|
|
|
|
|
|
|
|
r = getrec(1);
|
|
|
|
|
|
|
|
if (r) {
|
|
|
|
if (!back && strcmp(oldkey2, fld[1][kpos[1]]) > 0)
|
|
|
|
error("file2 is not sorted", (char *) 0);
|
|
|
|
strcpy(oldkey2, fld[1][kpos[1]]); /* save prev key for sort check */
|
|
|
|
}
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
int getrec(file)
|
|
|
|
{ /* read one line to split it */
|
|
|
|
if (fgets(buf[file], BUFSIZ, fp[file]) == (char *) 0)
|
|
|
|
*buf[file] = '\0';
|
|
|
|
else if (*buf[file] == '\n' || *buf[file] == '\r')
|
|
|
|
error("null line in file%s", file ? "1" : "0");
|
|
|
|
|
|
|
|
return split(file);
|
|
|
|
}
|
|
|
|
|
|
|
|
int split(file)
|
|
|
|
{ /* setup fields */
|
|
|
|
register int n;
|
|
|
|
register char *s, *t;
|
|
|
|
|
|
|
|
for (n = 0, s = buf[file]; *s && *s != '\n' && *s != '\r';) {
|
|
|
|
if (sep) {
|
|
|
|
for (t = s; *s && *s != sep && *s != '\n' && *s != '\r'; s++);
|
|
|
|
} else {
|
|
|
|
while (*s == ' ' || *s == '\t')
|
|
|
|
s++; /* skip leading white space */
|
|
|
|
for (t = s; *s && *s != ' ' && *s != '\t'
|
|
|
|
&& *s != '\n' && *s != '\r'; s++);
|
|
|
|
/* We will treat trailing white space as NULL field */
|
|
|
|
}
|
|
|
|
if (*s) *s++ = '\0';
|
|
|
|
fld[file][n++] = t;
|
|
|
|
if (n == MAXFLD) error("too many filed in file%s", file ? "1" : "0");
|
|
|
|
}
|
|
|
|
fld[file][n] = (char *) 0;
|
|
|
|
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
|
|
|
FILE *efopen(file, mode)
|
|
|
|
char *file, *mode;
|
|
|
|
{
|
|
|
|
FILE *fp;
|
|
|
|
|
|
|
|
if ((fp = fopen(file, mode)) == (FILE *) 0) error("can't open %s", file);
|
|
|
|
|
|
|
|
return fp;
|
|
|
|
}
|
|
|
|
|
|
|
|
void error(s, t)
|
|
|
|
char *s, *t;
|
|
|
|
{
|
|
|
|
fprintf(stderr, "%s: ", cmd);
|
|
|
|
fprintf(stderr, s, t);
|
|
|
|
fprintf(stderr, "\n");
|
|
|
|
|
|
|
|
exit(1);
|
|
|
|
}
|