/* index sequential files, producing .idx, .sel and .pos files */
/* Bruce Tanner - Cerritos College */

/*
Version History:

1.0  05/10/93 Original attempt
2.0  06/20/93 Create indexed files directly, add keyword count field
2.1  07/08/93 Change the file name for NOISE_WORDS
2.2  07/08/93 Move the range end (end_pos) to before the terminator
2.2jlw 07/14/93 - JLW added length spec to dash, added additional topic
                  divider keywords
2.3  07/19/93 Set multi-buffer, multi-block counts, read-ahead, write-behind
              and deferred write; noticeably increased performance
2.4  07/26/93 Removed index name, added CLI$ interface, added /TOPIC
2.4jlw 07/27/93 fixed version retention, which was broken
2.5  07/27/93 Selector strings forced to lowercase; use a good copy
2.6  07/29/93 revamp /TOPIC syntax to include text, size, exclude
2.7  07/30/93 make SIZE=n pad as well as truncate field width
2.8  08/03/93 take wildcard input file names, add /OUTPUT, /VERSION
2.9  08/05/93 JLW changed filename sizes from 80 to 256 characters
2.10 08/05/93 add check for max number of topics, reformat code
2.11 08/24/93 JLW added specific statuses for exit errors
2.12 10/01/93 add /NODEFAULT_TOPIC to omit topics that have no topic keyword
2.13 11/03/93 add /LINK to generate .link file instead of .idx/.sel
2.14 11/15/93 add /NOISE=file to specify the noise words file
2.15 11/17/93 add /TOPIC=(position), /FIELD=(position, size), /PUNCTUATION
2.16 11/18/93 fix illegal strcpy for AXP, add /MAX_TOPICS
2.17 11/21/93 make load_noise friendlier, add /NOPUNCTUATION support
2.18 11/27/93 add /MINIMUM_WORD, /COUNT_WORDS
2.19 11/30/93 fix broken /TOPIC
2.20 03/20/94 sort words, add /LINK=SORT, /SEQUENTIAL, remove /COUNT_WORDS
2.21 04/29/94 add /NONUMBERS
2.22 06/23/94 add /TOPIC=(offset) /TOPIC=(position=0)
2.23 06/24/94 add /TOPIC=(end)
2.24 06/27/94 add /CANDIDATE, /KEYWORD=(text,end,exclude)
2.24a 06/29/94 replaced VAXC-specific "#include foo" declarations with
		more portable "#include <foo.h>" (so DECC won't balk).
2.25 08/04/94 fix /TOPIC=END not matching
2.26 09/15/94 /KEYWORD=END=foo stopped at end of line if 'foo' wasn't found
2.27 09/27/94 change get_text() to return updated pointer to fix mangled text
3.0  09/29/94 redo parsing routines, add /SPECIFICATION, /TOPIC=BREAK
3.1  10/10/94 add /SELECTOR, don't index selector line
3.2  10/17/94 change /SELECTOR to /SELECTOR=(TEXT,END,BOTH)
3.3  11/04/94 add /KEYWORD=(offset), extend selector to include host/port
3.4  11/07/94 add /HELPFILE /SELECTOR=IGNORE
3.5  12/16/94 move close of link file for wildcards
3.6  01/02/95 program around selector.end default problem
3.7  03/18/95 add /TOPIC=LITERAL
3.8  04/28/95 fix problems with /LINK/WHOLE and /LINK/OUTPUT
4.0  05/16/95 add word position code
4.1  06/10/95 add /URL, convert selector file items to URLs
4.2  06/25/95 add /NOPOSITION, Joel Snyder's /COUNT
4.3  11/12/95 merge partial /URL and existing file name.
4.4  11/27/95 change format of selector
4.5  01/01/96 handle case where topic.text is used by another qualifier
4.6  01/02/96 add /EXCLUDE, /QUIET
4.7  01/04/96 change default to /NOVERSION, fix url.default and /HELP bugs
     01/07/96 add /URL=PREFIX, /URL=BOTH
     01/11/96 changed _tolower() to tolower() for GNU C
     01/12/96 add /TOPIC=FIRST
     01/14/96 add Arne Vajhøj's international toupper/tolower code
4.8  03/08/96 add Dave Smith's fixes for Gopher selectors
4.9  04/21/96 add /TOPIC=FILE=FULL, fix /TOPIC=BREAK
4.10 05/04/96 detect word index field overflow.  Add /TOPIC=DEFAULT
4.11 07/08/96 remove the restriction that a line can only satisfy one /TOPIC
4.12 09/22/96 add Malcolm Dunnett's wildcard read error processing code
     11/01/96 add /URL=FRAGMENT

	9/6/00	RDP	use dummy file pointer instead of lnk for
			first call of write_words, where the lnk
			pointer is undefined, but not used because of the
			switch values in the if statements, to satisfy
			DEC C compiler. 
*/

#define VERSION "4.12  11/01/96"
 
#include <ssdef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <rms.h>
#include <rmsdef.h>
#include <descrip.h>
#include <climsgdef.h>
#include <lib$routines.h>
#include <starlet.h>
#include <fscndef.h>
#include <str$routines.h>
#include <strdef.h>
#include "intctype.h"

#define CHUNK 100         /* increment to expand table of words */
#define DESC_SIZE 500     /* maximum size of a topic description */
#define SELECTOR_SIZE 200 /* maximum size of a selector (minus description) */
#define TOPIC_SIZE 20     /* maximum number of topics to list */
#define PUNCT_CHARS  "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
#define DEFAULT_POS 0   /* column to look for topic by default; 0 = anywhere */
#define MAX_INPUT_LINE 500
#define MAGIC_COOKIE "MAX#INDEX"

typedef struct {
    char *literal;
    char *text;
    char *found;
    char *end;
    char *deftext;
    int  pos;
    int  size;
    int  offset;
    int  exclude;
    int  force_break;
    int  used;
    int  first;
    int  file;
} topic_str;

typedef struct {
    char *name;
    int  state;
    int  multiple;  /* 0 = no list, 1 = unread list member, 2 = already read */
    char *value;
} switch_str;

typedef struct {
    char *str;
    int  pos;
} table_str;

typedef struct dsc$descriptor_s dsc;
typedef enum {para, dash, hex, equal, line, whole, field, force} sep_type;

int words_index, words_size;
table_str *words, *noise, *candidate;
int noise_index, noise_size;
int candidate_index, candidate_size;
int max_word, max_topic, max_count, max_wid, max_pos, sequential;
char *idx_record, *idx_key, *prev_keyword, *pos_record;
char sel_record[DESC_SIZE + SELECTOR_SIZE + 100];
int keyword_index;
FILE *spc;
switch_str switches[100];
topic_str topics[TOPIC_SIZE];
topic_str keywords[TOPIC_SIZE];
char *selector_spec, *url_spec, *url_fragment;
int word_index = 1;
int db_index = 0;

int cli$dcl_parse();
int cli$get_value();
int cli$present();

void find_eof(struct RAB *);
void build_words(char *, char *, int, int *);
void test_words(char *, char *, int, topic_str *, int *);
void expand_table(table_str **, int *);
void write_words(FILE *, FILE *, struct RAB *, struct RAB *, struct RAB *,
                 int *, int *, char *, topic_str *, sep_type, int *, char *,
                 int);
void load_words(char *, char *, table_str **, int *, int *);
int is_noise(char *, int, int);
int is_candidate(char *, int);
int is_punct(char, char *);
int is_spaces(char *, int, int);
dsc *descr(char *);
void parse_topic(char *, topic_str *);
void parse_keyword(char *, topic_str *);
void *my_realloc(void *, int);
void index_commands();
int find_str(char *, char *);
void parse_commands(dsc *, switch_str[]);
int switch_present(char *);
char *switch_value(char *);
void lower(char *);
void check_alloc(void *);
int exclude(char *);
void make_valid(char *);


main(int argc, char *argv[])
{
    FILE  *src, *lnk, *dummy;
    char  *cp, *cp2, *ptr, desc[DESC_SIZE + 1], src_line[MAX_INPUT_LINE];
    static char cli_input[256], punctuation[128], temp_punct[128];
    static char value[20], file_arg[256], file_spec[256], out_name[256];
    static char spec_name[256], spec_line[270], temp_number[20];
    char  orig_line[MAX_INPUT_LINE], lc_line[MAX_INPUT_LINE], temp_line[MAX_INPUT_LINE];
    char  spaces[DESC_SIZE + 1], help_index[10];
    int   start_pos, end_pos, status, index, word_pos, context = 0;
    sep_type type = whole;
    int   dash_len = 0, ind, minimum_word, where, first_time = TRUE;
    int   hex_value, field_pos = 1, field_size, zero, prefix;
    int   max_lines = 0, read_lines = 0; /* jms/950422 */
    short leng;
    char  *dashes = NULL;
    struct FAB idxfab, selfab, posfab;
    struct RAB idxrab, selrab, posrab;
    struct XABKEY idxxab, selxab, posxab;
    $DESCRIPTOR(input_dsc, cli_input);
    $DESCRIPTOR(file_dsc, file_arg);
    $DESCRIPTOR(file_spec_dsc, file_spec);
    $DESCRIPTOR(out_dsc, out_name);
    $DESCRIPTOR(punct_dsc, temp_punct);
    $DESCRIPTOR(value_dsc, value);
    $DESCRIPTOR(spec_dsc, spec_name);
    $DESCRIPTOR(spec_line_dsc, spec_line);

    /* initialize the topics and keywords arrays */
    for (index = 0; index < TOPIC_SIZE; index++) {
        topics[index].literal = NULL;
        topics[index].text = NULL;
        topics[index].end = NULL;
        topics[index].found = NULL;
        topics[index].deftext = NULL;
        topics[index].pos = 0;
        topics[index].size = -1;
        topics[index].offset = 0;
        topics[index].exclude = 0;
        topics[index].used = 0;
        topics[index].force_break = 0;
        topics[index].first = 0;
        topics[index].file = 0;

        keywords[index].literal = NULL;
        keywords[index].text = NULL;
        keywords[index].end = NULL;
        keywords[index].found = NULL;
        keywords[index].deftext = NULL;
        keywords[index].pos = 0;
        keywords[index].size = 0;
        keywords[index].offset = 0;
        keywords[index].exclude = 0;
        keywords[index].used = 0;
        keywords[index].force_break = 0;
        keywords[index].first = 0;
        keywords[index].file = 0;
    }

   /*
    * start up the CLI parse
    * add "index" to arg list and pass to cli$dcl_parse
    * this does not parse the individual qualifiers; that's done below
    */
    status = lib$get_foreign(&input_dsc, 0, &leng, 0);

    for (ind = leng; ind >= 0; ind--)
        cli_input[ind+6] = cli_input[ind];
    strncpy(cli_input, "index ", 6);            
    input_dsc.dsc$w_length = leng+6;

    status = cli$dcl_parse(&input_dsc, index_commands);

    if (status != CLI$_NORMAL)  /* error in parse, exit */
        exit(7);

    /* no source file given; provide a little help */
    if ((cli$present(descr("file")) & 1) == 0) {
        printf("BUILD_INDEX %s\n", VERSION);
        printf("Usage: index document\n");
        printf(" /ADD                update index files with source\n");
        printf(" /CANDIDATES=file    specify a file of words for index candidates\n");
        printf(" /CHARACTER=n        text separated by control character 'n'\n");
        printf(" /COUNT=n            stop after reading n lines\n");
        printf(" /DASH=n             text separated n dashes (default 3)\n");
        printf(" /[NO]DEFAULT_TOPIC  keep [discard] topics [not] matched by /TOPIC\n");
        printf(" /EQUAL=n            text separated n equals (default 80)\n");
        printf(" /FF                 text separated by form feeds\n");
        printf(" /FIELD=(position,size)  specify topic break on field\n");
        printf(" /HELPFILE=(selector,title)  file to match query \"?\"\n");
        printf(" /KEYWORD=(text,end,offset,exclude) specify indexing range\n");
        printf(" /LINE               each line is separate text entry\n");
        printf(" /LINK[=SORT]        generate .link file instead of .idx,.sel files\n");
        printf(" /MAX_TOPICS=n       maximum size of topic ID field (default 6)\n");
        printf(" /MINIMUM_WORD=n     define minimum word to index (default 3)\n");
        printf(" /NOISE=file         specify a file of words to omit in the index\n");
        printf(" /NONUMBERS          omit all numbers from the index\n");
        printf(" /OUTPUT=file        override name of index/selection files\n");
        printf(" /PARAGRAPH          text separated by blank lines\n");
        printf(" /[NO]POSITION       include [omit] word position information\n");
        printf(" /PUNCTUATION=\"...\"  specify the characters that separate words\n");
        printf(" /SELECTOR=(text,end,default,both,ignore) specify selectors to generate\n");
        printf(" /SEQUENTIAL         create sequential files (.seqidx, .seqsel)\n");
        printf(" /SPECIFICATION=file specify a file of qualifiers\n");
        printf(" /TOPIC=(text,end,position,size,offset,exclude,break) specify topic names\n");
        printf(" /URL=(text,end,default) specify selectors to generate\n");
        printf(" /[NO]VERSION        keep [discard] document version in selection\n");
        printf(" /WHOLE              whole file is one text entry\n");
        printf(" /WORD_LENGTH=n      maximum size of index key (default 20)\n");
        exit(1);
    }

    /* if there is a /SPECIFICATION=file, read it */
    if (cli$present(descr("specification")) & 1) { 
        status = cli$get_value(descr("specification"), &spec_dsc, &leng);
        spec_name[leng] = '\0';
        if ((spc = fopen(spec_name, "r")) == NULL) {
            printf("Can't read spec file %s\n", spec_name);
            exit(13);
        }
        /* and parse every line of the spec file */
        while (fgets(spec_line, sizeof(spec_line), spc)) {
            if ((spec_line[0] == '\n') || (spec_line[0] == '#') ||
                (spec_line[0] == '!'))
                continue;                 /* skip blank and comment lines */
            ptr = strchr(spec_line, '\n');
            if (ptr) *ptr = '\0';
            leng = strlen(spec_line);
            for (ind = leng; ind >= 0; ind--)
                spec_line[ind+6] = spec_line[ind];
            strncpy(spec_line, "index ", 6);            
            spec_line_dsc.dsc$w_length = leng+6;

            parse_commands(&spec_line_dsc, switches);
        }
    }

    /* then parse any other qualifiers on the command line */
    parse_commands(&input_dsc, switches);

    if (switch_present("paragraph"))
        type = para;
    if (switch_present("ff")) {
        type = hex;                      /* /FF same as /character=12 */
        hex_value = '\f';
    }
    if (switch_present("character")) {
        hex_value = atoi(switch_value("character"));
        type = hex;
    }
    if (switch_present("whole"))
        type = whole;
    if (switch_present("line"))
        type = line;
    if (switch_present("dash")) {
        dash_len = atoi(switch_value("dash"));
        type = dash;
    }
    if (switch_present("equal")) {
        dash_len = atoi(switch_value("equal"));
        type = equal;
    }
    if (switch_present("word_length")) {
        max_word = atoi(switch_value("word_length"));
    }
    if (switch_present("count")) {
        max_lines = atoi(switch_value("count"));
    }
    if (switch_present("field")) {
        type = field;
        field_pos = atoi(switch_value("field.position"));
        field_size = atoi(switch_value("field.size"));
    }
    strcpy(punctuation, PUNCT_CHARS);  /* default for /punctuation */
    if (switch_present("punctuation")) {
        strcpy(temp_punct, switch_value("punctuation"));
        if (temp_punct[0] == '"') {  /* if quoted string */
            strncpy(punctuation, temp_punct+1, leng-2);
            punctuation[leng-2] = '\0';
        }
        else if (strlen(temp_punct) > 0)
            strcpy(punctuation, temp_punct);
    }
    else {  /*  /nopunctuation="$" means exclude $ from punct chars */
        if (cp = switch_value("punctuation"))
            strcpy(temp_punct, cp);
        else
            strcpy(temp_punct, "");
        if (temp_punct[0] == '"') {   /* if quoted string */
            strcpy(temp_punct, temp_punct+1);
            temp_punct[leng-2] = '\0';
        }
        for (cp = temp_punct; *cp; cp++) {
            cp2 = strchr(punctuation, *cp);
            if (cp2)
                strcpy(cp2, cp2+1);  /* remove character from punctuation */
        }
    }
    if (switch_present("max_topics")) {
        /* query assumes topic = wid = count */
        max_topic = max_count = max_wid = max_pos = atoi(switch_value("max_topics"));
        if (max_topic > 9) {
            printf("/MAX_TOPICS specifies the number of digits in the topic number field.\n");
            printf("A 32 bit system cannot handle integers greater than 9 digits.\n");
            exit(9);
        }
    }
    if (switch_present("minimum_word"))
        minimum_word = atoi(switch_value("minimum_word"));

    sequential = (switch_present("sequential"));

    strcpy(file_arg, switch_value("file"));  /* get source */
    file_dsc.dsc$w_length = (short) strlen(file_arg);  /* set the descriptor length */

    strncpy(file_spec, "", sizeof(file_spec));  /* clear out file_spec */

    /* in the case of wildcard file names, lib$find_file will expand them */
    status = lib$find_file(&file_dsc, &file_spec_dsc, &context, 0, 0, 0, 0);
    if ((status & 1) == 0) {
        printf("lib$find_file failed: status %X\n", status);
        exit(11);
    }
    ptr = strchr(file_spec, ' ');
    if (ptr)
        *ptr = '\0';            /* chop off trailing spaces */

    strcpy(out_name, file_spec);    /* make copy for output spec */

    if (switch_present("output"))   /* if /output, overwrite out_name */
        strcpy(out_name, switch_value("output"));

    words_size = words_index = 0;    /* no words yet */
    words = NULL;
    noise_size = noise_index = 0;    /* no noise yet */
    noise = NULL;
    candidate_size = candidate_index = 0;    /* no candidate yet */
    candidate = NULL;
    if (!switch_present("link")) {
        /* load noise words */
        load_words("noise", punctuation, &noise, &noise_size, &noise_index);
        /* load candidate words */
        load_words("candidates", punctuation, &candidate, &candidate_size, &candidate_index);
    }

    /* dynamically allocate space and constants that depend on switches */
    dashes = (char *) malloc(dash_len+1);
    memset((void *) dashes, (type==dash) ? '-' : '=', dash_len);
    dashes[dash_len] = '\0';

    memset((void *) spaces, ' ', DESC_SIZE); /* make spaces for padding topic */
    spaces[DESC_SIZE] = '\0';

    idx_record = (char *) calloc(max_word + max_count + max_topic + max_wid + 1,
                                 sizeof(char));
    idx_key = (char *) calloc(max_word + 1, sizeof(char));

    pos_record = (char *) calloc(max_wid + max_pos + 1, sizeof(char));

    prev_keyword = (char *) calloc(max_word + 1, sizeof(char));

    if (sequential && ((max_topic != 6) || (max_word != 20))) {
        printf("WARNING: Your FDL file will need to reflect the following:\n");
        printf("IDX file record length: %d\n", max_word + max_topic * 3);
        printf("IDX file key 0 size: %d\n", max_word + max_topic);
        printf("SEL file key 0 size: %d\n", max_topic);
        printf("POS file record length: %d\n", max_wid + max_pos);
        printf("POS file key 0 size: %d\n", max_wid);
    }

    /* set up index file */
    if (!switch_present("link")) {
        ptr = strrchr(out_name, '.');  /* just get file name */
        if (ptr) *ptr = '\0';
        if (sequential)
            strcat(out_name, ".seqidx");
        else
            strcat(out_name, ".idx");
    }

    idxfab = cc$rms_fab;
    idxfab.fab$l_alq = 100;
    idxfab.fab$b_bks = 3;
    idxfab.fab$w_deq = 25;
    idxfab.fab$b_fac = FAB$M_PUT | FAB$M_GET | FAB$M_DEL;
    idxfab.fab$l_fna = out_name;
    idxfab.fab$b_fns = strlen(out_name);
    idxfab.fab$l_fop = FAB$M_CBT | FAB$M_DFW;
    idxfab.fab$w_mrs = max_word + max_topic + max_wid + max_count;
    idxfab.fab$b_org = sequential ? FAB$C_SEQ : FAB$C_IDX;
    idxfab.fab$b_rat = FAB$M_CR;
    idxfab.fab$b_rfm = FAB$C_FIX;
    idxfab.fab$b_shr = FAB$M_NIL;
    idxfab.fab$l_xab = (char *) &idxxab;

    idxrab = cc$rms_rab;
    idxrab.rab$l_fab = (struct FAB *) &idxfab;
    idxrab.rab$b_krf = 0;
    idxrab.rab$l_kbf = idx_key;
    idxrab.rab$b_ksz = max_word;
    idxrab.rab$b_rac = sequential ? RAB$C_SEQ : RAB$C_KEY;
    idxrab.rab$l_rbf = idx_record;
    idxrab.rab$w_rsz = max_word + max_topic + max_wid + max_count;
    idxrab.rab$l_ubf = idx_record;
    idxrab.rab$w_usz = max_word + max_topic + max_wid + max_count;
    idxrab.rab$b_mbf = 20;
    idxrab.rab$l_rop = RAB$M_RAH | RAB$M_WBH;

    idxxab = cc$rms_xabkey;
    idxxab.xab$b_dtp = XAB$C_STG;
    idxxab.xab$b_flg = XAB$M_DAT_NCMPR | XAB$M_IDX_NCMPR;
    idxxab.xab$w_pos0 = 0;
    idxxab.xab$b_siz0 = max_word + max_topic;
    idxxab.xab$b_ref = 0;

    if (!switch_present("link")) {
        if (switch_present("add")) {
            if (((status = sys$open(&idxfab)) & 1) != SS$_NORMAL)
                lib$stop(status);
        }
        else {
            if (((status = sys$create(&idxfab)) & 1) != SS$_NORMAL)
                lib$stop(status);    
        }
        if (((status = sys$connect(&idxrab)) & 1) != SS$_NORMAL)
            lib$stop(status);

    /* set up selector file */
        ptr = strrchr(out_name, '.');  /* just get file name */
        if (ptr) *ptr = '\0';
        if (sequential)
            strcat(out_name, ".seqsel");
        else
            strcat(out_name, ".sel");
    }

    selfab = cc$rms_fab;
    selfab.fab$l_alq = 10;
    selfab.fab$b_bks = 3;
    selfab.fab$w_deq = 5;
    selfab.fab$b_fac = FAB$M_PUT;
    selfab.fab$l_fna = out_name;
    selfab.fab$b_fns = strlen(out_name);
    selfab.fab$l_fop = FAB$M_CBT | FAB$M_DFW;
    selfab.fab$w_mrs = max_topic + DESC_SIZE + SELECTOR_SIZE;
    selfab.fab$b_org = sequential ? FAB$C_SEQ : FAB$C_IDX;
    selfab.fab$b_rat = FAB$M_CR;
    selfab.fab$b_rfm = FAB$C_VAR;
    selfab.fab$b_shr = FAB$M_NIL;
    selfab.fab$l_xab = (char *) &selxab;

    selrab = cc$rms_rab;
    selrab.rab$l_fab = (struct FAB *) &selfab;
    selrab.rab$b_rac = sequential ? RAB$C_SEQ : RAB$C_KEY;
    selrab.rab$l_rbf = sel_record;
    selrab.rab$b_mbf = 20;
    selrab.rab$l_rop = RAB$M_RAH | RAB$M_WBH | RAB$M_EOF;

    selxab = cc$rms_xabkey;
    selxab.xab$b_dtp = XAB$C_STG;
    selxab.xab$b_flg = XAB$M_DAT_NCMPR | XAB$M_IDX_NCMPR;
    selxab.xab$w_pos0 = 0;
    selxab.xab$b_siz0 = max_topic;
    selxab.xab$b_ref = 0;

    if (!switch_present("link")) {
        if (switch_present("add")) {
            if (((status = sys$open(&selfab)) & 1) != SS$_NORMAL)
                lib$stop(status);
        }
        else {
            if (((status = sys$create(&selfab)) & 1) != SS$_NORMAL)
                lib$stop(status);    
        }
        if (((status = sys$connect(&selrab)) & 1) != SS$_NORMAL)
            lib$stop(status);

    /* set up position file */
        ptr = strrchr(out_name, '.');  /* just get file name */
        if (ptr) *ptr = '\0';
        if (sequential)
            strcat(out_name, ".seqpos");
        else
            strcat(out_name, ".pos");
    }

    posfab = cc$rms_fab;
    posfab.fab$l_alq = 100;
    posfab.fab$b_bks = 3;
    posfab.fab$w_deq = 25;
    posfab.fab$b_fac = FAB$M_PUT;
    posfab.fab$l_fna = out_name;
    posfab.fab$b_fns = strlen(out_name);
    posfab.fab$l_fop = FAB$M_CBT | FAB$M_DFW;
    posfab.fab$w_mrs = max_wid + max_pos;
    posfab.fab$b_org = sequential ? FAB$C_SEQ : FAB$C_IDX;
    posfab.fab$b_rat = FAB$M_CR;
    posfab.fab$b_rfm = FAB$C_FIX;
    posfab.fab$b_shr = FAB$M_NIL;
    posfab.fab$l_xab = (char *) &posxab;

    posrab = cc$rms_rab;
    posrab.rab$l_fab = (struct FAB *) &posfab;
    posrab.rab$b_rac = sequential ? RAB$C_SEQ : RAB$C_KEY;
    posrab.rab$l_rbf = pos_record;
    posrab.rab$w_rsz = max_wid + max_pos;
    posrab.rab$l_ubf = pos_record;
    posrab.rab$w_usz = max_wid + max_pos;
    posrab.rab$b_mbf = 20;
    posrab.rab$l_rop = RAB$M_RAH | RAB$M_WBH | RAB$M_EOF;

    posxab = cc$rms_xabkey;
    posxab.xab$b_dtp = XAB$C_STG;
    posxab.xab$b_flg = XAB$M_DUP | XAB$M_DAT_NCMPR | XAB$M_IDX_NCMPR;
    posxab.xab$w_pos0 = 0;
    posxab.xab$b_siz0 = max_wid;
    posxab.xab$b_ref = 0;

    selector_spec = (char *) calloc(1, sizeof(char)); /* make empty spec */
    url_spec = (char *) calloc(1, sizeof(char));
    url_fragment = (char *) calloc(1, sizeof(char));

    if (!switch_present("link")) {
        if (switch_present("position")) {
            if (switch_present("add")) {
                if (((status = sys$open(&posfab)) & 1) != SS$_NORMAL)
                    lib$stop(status);
            }
            else {
                if (((status = sys$create(&posfab)) & 1) != SS$_NORMAL)
                    lib$stop(status);    
            }
            if (((status = sys$connect(&posrab)) & 1) != SS$_NORMAL)
                lib$stop(status);
        }

        /* if /helpfile given, set up index values */
        if (switch_present("helpfile.selector")) {
            selector_spec = (char *)
                calloc(strlen(switch_value("helpfile.selector")) + 1,
                       sizeof(char));
            strcpy(selector_spec, switch_value("helpfile.selector"));

            if (switch_present("helpfile.title"))
                strcpy(desc, switch_value("helpfile.title"));
            else
                strcpy(desc, "Help on search commands");
            word_pos = 0;
            strcpy(help_index, "?");
            build_words(help_index, "", 0, &word_pos);  /* add "?" to the index */
            strcpy(help_index, "?help");
            build_words(help_index, "", 0, &word_pos);  /* add "?help" to the index */
	    dummy = NULL;
            write_words((FILE *) 0, dummy, &selrab, &idxrab, &posrab, &start_pos,
                        &end_pos, desc, topics, type, &word_pos,
                        punctuation, minimum_word);  /* write helpfile */
        }
    }
    else {  /* /link */
        if (*out_name != '.') {  /* if the output name has no leading dot */
            ptr = strrchr(out_name, '.');
            if (ptr) *ptr = '\0';      /* replace any file type with .link */
            strcat(out_name, ".link");
        }
        lnk = fopen(out_name, "w", "mbc=50", "mbf=20");
        if (!switch_present("link.sort"))
            fprintf(lnk, "Sortdir=False\n\n");
    }

    if (switch_present("add")) {
        if (idxxab.xab$b_siz0 != (max_word + max_count)) {
            printf("Source and index file /MAX_TOPIC & /WORD_SIZE do not match\n");
            exit(13);
        }
        /* find previous magic cookie in the index file */
        if (switch_present("sequential")) {
            find_eof(&idxrab);
            status = sys$get(&idxrab);
            if ((status & 1) != SS$_NORMAL)
                lib$stop(status);
        }
        else {
            char *record_copy;

            strcpy(idx_key, MAGIC_COOKIE);
            idxrab.rab$l_kbf = idx_key;
            idxrab.rab$b_ksz = strlen(idx_key);
            record_copy = (char *) calloc(max_word + max_count + max_topic + 
                                   max_wid + 1, sizeof(char));
            check_alloc(record_copy);
            for (;;) {
                status = sys$get(&idxrab);
                if (((status & 1) == SS$_NORMAL) &&
                    (strncmp(idx_record, MAGIC_COOKIE, strlen(MAGIC_COOKIE)) == 0))
                    strcpy(record_copy, idx_record);
                else
                    break;
                idxrab.rab$b_rac = RAB$C_SEQ;
            }
            strcpy(idx_record, record_copy);
            free(record_copy);  /* should be cfree, but alpha chokes */
            idxrab.rab$b_rac = RAB$C_KEY;
        }
        if (strncmp(idx_record, MAGIC_COOKIE, strlen(MAGIC_COOKIE))) {
            printf("Not Magic: %s\n", idx_record);
            exit(17);
        }
        strncpy(temp_number, "", sizeof(temp_number));
        strncpy(temp_number, idx_record + max_word, max_count);
        db_index = atoi(temp_number);
        strncpy(temp_number, "", sizeof(temp_number));
        strncpy(temp_number, idx_record + max_word + max_count, max_topic);
        word_index = atoi(temp_number);
    }

    /* if /NOPOSITION, force word index to 0 */
    if (!switch_present("position"))
        word_index = 0;


    for (;;) {  /* process all files in input spec, first one already found */

        if (first_time) {         /* skip the lib$find_file the first time */
            first_time = FALSE;
            status = 1;
        }
        else
            status = lib$find_file(&file_dsc, &file_spec_dsc, &context, 0, 0, 0, 0);

        if (status == RMS$_NMF) {        /* no more files */
            lib$find_file_end(&context);
            if ((ptr = switch_value("file")) == NULL)
                break;  /* no file names left */
            strcpy(file_arg, ptr);  /* get source */
            file_dsc.dsc$w_length = (short) strlen(file_arg);  /* set the descriptor length */

            strncpy(file_spec, "", sizeof(file_spec));  /* clear out file_spec */
            status = lib$find_file(&file_dsc, &file_spec_dsc, &context, 0, 0, 0, 0);
        }
        ptr = strchr(file_spec, ' ');
        if (ptr) *ptr = '\0';            /* chop off trailing spaces */

        if ((status & 1) == 0) {
             printf("lib$find_file error %X on %s\n", status, file_spec);
             continue;
        }

        if (exclude(file_spec))  /* should we exclude this file? */
            continue;            /* yes */

        if ((src = fopen(file_spec, "r", "mbc=50", "mbf=20")) == NULL) { 
            printf("Can't read input file %s\n", file_spec);
            continue;
        }

        if (!switch_present("quiet"))
            printf("Building index for %s\n", file_spec);

        start_pos = ftell(src);          /* init start position */
        strncpy(desc, "", sizeof(desc));
        word_pos = 0;
        
        while (fgets(src_line, sizeof(src_line), src)) {
            /* If we've read too many lines, then break out jms/950422 */
            read_lines++;
            if ( (max_lines) && (read_lines > max_lines) ) {
                if (!switch_present("quiet"))
	            printf(" Finishing early because maximum line count reached\n");
		/* skip all of the lines until the last line, and then
 		 * replace the line we just read with that one.  Continue
		 * on.  The error will get re-echoed when we go back to the
		 * top of the loop, so we don't have to maintain any 
		 * icky state information.  jms/950626 
		 */
		/* strncpy(src_line, skip_to_end_of_file(src), sizeof(src_line)); */
                break;
            }

            /* if the first character of the line is the hex value, end topic */
            if ((src_line[0] == hex_value) && (type == hex)) {
                write_words(src, lnk, &selrab, &idxrab, &posrab, &start_pos,
                            &end_pos, desc, topics, type, &word_pos,
                            punctuation, minimum_word);
                continue;
            }
            ptr = strchr(src_line, '\n');
            if (ptr) *ptr = '\0';              /* remove newline */
            for (ptr = src_line; *ptr; ptr++)
                if (iscntrl(*ptr))  *ptr = ' ';  /* convert tabs to spaces */
            while ((strlen(src_line) > 0) &&
                   (src_line[strlen(src_line)-1] == ' '))
                src_line[strlen(src_line)-1] = '\0';/* remove trailing blanks */
            strcpy(orig_line, src_line);    /* copy before forcing lower case */
            lower(src_line);                /* force lowercase */
            strcpy(lc_line, src_line);      /* copy with leading blanks */
            for (ptr = src_line; *ptr; ptr++)
                if (*ptr > ' ') break;  /* find first non-blank char */
            strcpy(src_line, ptr);      /* remove leading blanks */
        
            /* break on dashes */
            if (((type == equal) || (type == dash)) &&
                (strncmp(orig_line, dashes, dash_len) == 0)) {
                write_words(src, lnk, &selrab, &idxrab, &posrab, &start_pos,
                            &end_pos, desc, topics, type, &word_pos,
                            punctuation, minimum_word);
                continue;
            }
            /* break on paragraph */
            if ((type == para) && (strlen(src_line) == 0)) {
                write_words(src, lnk, &selrab, &idxrab, &posrab, &start_pos,
                            &end_pos, desc, topics, type, &word_pos,
                            punctuation, minimum_word);
                continue;
            }
            /* break on non-empty field */
            if ((type == field) && !is_spaces(orig_line, field_pos, field_size)) {
                write_words(src, lnk, &selrab, &idxrab, &posrab, &start_pos,
                            &end_pos, desc, topics, type, &word_pos,
                            punctuation, minimum_word);
                start_pos = end_pos;   /* don't skip over line with field break */
            }

            /* apply topic rules */
            for (index = 0; topics[index].used; index++) {
                where = topics[index].pos;      /* where text is found */

                if (topics[index].deftext && (strlen(desc) == 0))
                    strncpy(desc, topics[index].deftext, DESC_SIZE);

                if (/* if we have no match or don't want the first match */
                    ((topics[index].found == NULL) ||
                     (strlen(topics[index].found) == 0) ||
                     (topics[index].first == FALSE))
                    &&
                    /* if text matches the source text and position */
                    (((topics[index].pos > 0) && topics[index].text
                     && strncmp(lc_line + topics[index].pos - 1,
                            topics[index].text, strlen(topics[index].text)) == 0)
                    ||  /* or position = 0 and text is found _somewhere_ */
                    ((topics[index].pos == 0)
                     && (where = find_str(lc_line, topics[index].text)))
                    || /* or no text given but position and size field is non-blank */
                    (!topics[index].text &&
                     !is_spaces(orig_line, topics[index].pos, topics[index].size)))
                   ) {
                    /* if topic matches and requested a break, do it */
                    if (topics[index].force_break) {
                        type = force;  /* override other types */
                        write_words(src, lnk, &selrab, &idxrab, &posrab,
                                    &start_pos, &end_pos, desc, topics, type,
                                    &word_pos, punctuation, minimum_word);
                        start_pos = end_pos;   /* don't skip over topic line */
                    }
                    /* make copy of line at start of topic text */
                    if (topics[index].exclude)
                        strcpy(temp_line, orig_line + where - 1 + strlen(topics[index].text));
                    else
                        strcpy(temp_line, orig_line + where - 1);
                    topics[index].found = (char *) my_realloc((char *) topics[index].found,
                                         ((topics[index].size > -1) ? topics[index].size : strlen(orig_line))
                                         + 1);
                    if (topics[index].size > -1) {    /* want fixed topic size */
                        strncpy(topics[index].found, temp_line + topics[index].offset,
                                topics[index].size);
                        topics[index].found[topics[index].size] = '\0';
                        strncat(topics[index].found, spaces,
                                topics[index].size - strlen(topics[index].found));
                    }
                    else {   /* copy to end of topic */
                        strcpy(topics[index].found, temp_line + topics[index].offset);
                        if (where = find_str(topics[index].found, topics[index].end))
                            topics[index].found[where - 1] = '\0';    /* terminate the found string */
                    }
                }
            }

            /* save the first line by default */ 
            if ((switch_present("default_topic")) && (strlen(desc) == 0))
                strncpy(desc, orig_line, DESC_SIZE);

            /* apply selector rules */
            if (switch_present("selector.text") &&
                (where = find_str(lc_line, switch_value("selector.text")))) {
                selector_spec = (char *) my_realloc((char *) selector_spec,
                                         strlen(orig_line) + 1);
                strcpy(selector_spec, orig_line + where - 1 +
                       strlen(switch_value("selector.text")));
                if (switch_present("selector.end") &&  /* if selector=end given */
                    (where = find_str(selector_spec, switch_value("selector.end"))))
                    selector_spec[where - 1] = '\0';  /* mark selector end */
                while (*selector_spec == ' ')  /* remove leading spaces */
                    strcpy(selector_spec, selector_spec + 1);
                continue;                    /* do not index this line */
            }

            /* URL rules */
            if (switch_present("url.text") &&
                (where = find_str(lc_line, switch_value("url.text")))) {
                if (switch_present("url.prefix"))
                    prefix = strlen(switch_value("url.prefix"));
                else
                    prefix = 0;
                url_spec = (char *) my_realloc((char *) url_spec,
                                               prefix + strlen(orig_line) + 1);
                /* prefix url */
                strcpy(url_spec, switch_present("url.prefix") ?
                                 switch_value("url.prefix") : "");
                /* plus url found */
                strcat(url_spec, orig_line + where - 1 +
                       strlen(switch_value("url.text")));
                if (switch_present("url.end") &&  /* if url=end given */
                    (where = find_str(url_spec, switch_value("url.end"))))
                    url_spec[where - 1] = '\0';  /* mark url end */
                while (*url_spec == ' ')  /* remove leading spaces */
                    strcpy(url_spec, url_spec + 1);
                continue;                    /* do not index this line */
            }

            if (switch_present("url.fragment") &&
                (where = find_str(lc_line, switch_value("url.fragment")))) {
                url_fragment = (char *) my_realloc((char *) url_fragment,
                                               strlen(orig_line) + 1);
                /* save fragment name for later */
                strcpy(url_fragment, orig_line + where - 1 +
                       strlen(switch_value("url.fragment")));
                while(strlen(url_fragment))
                    if (!isalpha(*url_fragment))
                        strcpy(url_fragment, url_fragment+1);
                    else
                        break;
                for (ptr = url_fragment; *ptr; ptr++)
                    if (!isalnum(*ptr) && (*ptr != '-')) {
                        *ptr = '\0'; /* fragment name must be alphanum or '-' */
                        break;
                    }
            }


            /* apply keyword rules, index words */
            if (!switch_present("link"))
                test_words(src_line, punctuation, minimum_word, keywords, &word_pos);

            end_pos = ftell(src);  /* end_pos points before any terminator */

            /* force topic break if in line mode */
            if (type == line)
                write_words(src, lnk, &selrab, &idxrab, &posrab, &start_pos,
                            &end_pos, desc, topics, type, &word_pos,
                            punctuation, minimum_word);
        }
        
        /* in case file doesn't end with a terminator */
        write_words(src, lnk, &selrab, &idxrab, &posrab, &start_pos,
                    &end_pos, desc, topics, type, &word_pos,
                    punctuation, minimum_word);
        fclose(src);
    }
    if (switch_present("link"))
        fclose(lnk);
    else {

    /* write out the values of db_index and word_index with a magic cookie */
        strcpy(idx_key, MAGIC_COOKIE);
        idxrab.rab$l_kbf = idx_key;
        idxrab.rab$b_ksz = strlen(idx_key);
        status = sys$get(&idxrab);          /* find old magic cookie */
        if ((status & 1) == SS$_NORMAL) {
            status = sys$delete(&idxrab);   /* and delete it */
            if ((status & 1) != SS$_NORMAL)
                lib$stop(status);
        }
        sprintf(idx_record, "%-*s%0*d%0*d%0*d",
                max_word, MAGIC_COOKIE,
                max_topic, db_index,
                max_wid, word_index,
                max_count, 0);
        idxrab.rab$w_rsz = strlen(idx_record);
        status = sys$put(&idxrab);         /* write new magic cookie */
        if ((status & 1) != SS$_NORMAL)
            lib$stop(status);
        sys$close(&selfab);
        sys$close(&idxfab);
        sys$close(&posfab);
    }
/*
 * don't bother freeing memory, just exit
 *
 *   for (ind = 0; ind < words_size; ind++)
 *       if (words[ind].str) cfree(words[ind].str);
 *   free(words);
 */
}


/* perform binary search on sequential index file */
/* since the index file is fixed length we can perform relative key lookups */
void find_eof(struct RAB *idxptr)
{
    int status, key, interval = 500, bottom = 0, top = 0;

    idxptr->rab$l_kbf = (char *) &key;
    idxptr->rab$b_ksz = 4;
    idxptr->rab$b_rac = RAB$C_KEY;
    for (;;) {
        key = bottom + interval;
        status = sys$find(idxptr);
        if ((status & 1) != SS$_NORMAL)
            break;
        bottom += interval;
    }
    top = bottom + interval;
    for (;;) {
        interval = (top - bottom)/2;
        if (interval == 0) break;
        key = bottom + interval;
        status = sys$find(idxptr);
        if ((status & 1) == SS$_NORMAL)
            bottom += interval;
        else
            top -= interval;
    }
    /* if we're past EOF, backup to the last record */
    while ((status & 1) != SS$_NORMAL) {
        key--;
        status = sys$find(idxptr);
    }
    idxptr->rab$b_rac = RAB$C_SEQ;
}


/* test for start/end of keyword indexing */
void test_words(char *line, char *punct, int minimum_word, topic_str *keywords, int *word_pos)
{
    char test_line[MAX_INPUT_LINE], copy_line[MAX_INPUT_LINE];
    int ind, where, retry;

    if (!switch_present("keyword")) {  /* no /keyword= */
        build_words(line, punct, minimum_word, word_pos);  /* index everything */
        return;
    }

    if ((keyword_index > -1)
        && ((keywords[keyword_index].end == NULL)
            || (strlen(keywords[keyword_index].end) == 0)))
        keyword_index = -1;  /* keyword indexing stops at EOL unless /keyword=end */

    strcpy(test_line, line);        /* copy source line */
    do {
        where = 0;
        retry = FALSE;
        if (keyword_index == -1) {      /* between keywords */
            for (ind = 0; keywords[ind].used; ind++)
                if (where = find_str(test_line, keywords[ind].text))
                    break;
            if (where) {
                keyword_index = ind;           /* record current keyword */
                strcpy(test_line, test_line + where - 1 +
                       keywords[ind].offset);  /* remove up to keyword */
                if (keywords[ind].exclude)
                    strcpy(test_line, test_line + strlen(keywords[ind].text));
            }
        }
        if (keyword_index > -1)       /* in keyword index */
            if (where = find_str(test_line, keywords[keyword_index].end)) {
                strcpy(copy_line, test_line);
                test_line[where - 1] = '\0';
                /* index contents of line */
                build_words(test_line, punct, minimum_word, word_pos);
                strcpy(test_line, copy_line + where - 1);  /* restart at end word */
                keyword_index = -1;   /* no longer indexing */
                retry = TRUE;        /* check for another keyword */
            }
            else {  /* indexing and no end word found */
                /* index contents of line */
                build_words(test_line, punct, minimum_word, word_pos);
                return;
            }
    } while (retry);
}


/* break line into words and save them in words[] */

void build_words(char *line, char *punct, int minimum_word, int *pos)
{
    char *cp, *cp2;

    for (cp = line; *cp; cp++)      /* convert punctuation to spaces */
        if (is_punct(*cp, punct)) *cp = ' ';

    strcat(line, " ");              /* line ends with a space */
    cp = line;
    while(cp2 = strchr(cp, ' ')) {  /* break at space boundary */
        *cp2 = '\0';
        if (strlen(cp) > max_word)
            if (!switch_present("quiet"))
                printf("Truncating %d character word (%s) to %d characters\n",
                       strlen(cp), cp, max_word);
        if ((strlen(cp) > 0) &&
            (((candidate_size == 0) &&
             !is_noise(cp, noise_size, minimum_word)) ||
            (candidate_size && is_candidate(cp, candidate_size)))) {
            if (words_index == words_size)  /* table full */
                expand_table(&words, &words_size);
            strncpy(words[words_index].str, cp, max_word);
            words[words_index++].pos = ++(*pos);
        }
        cp = cp2 + 1;
    }
}


/* expand *table[] by CHUNK elements of max_word characters */

void expand_table(table_str **table, int *size)
{
    int ind;

    *table = (table_str *) my_realloc((table_str *) *table,
                                      (*size + CHUNK) * sizeof(table_str));
    for (ind = 0; ind < CHUNK; ind++) {
        (*table)[*size + ind].str = (char *) calloc(max_word + 1, sizeof(char));
        check_alloc((*table)[*size + ind].str);
        (*table)[*size + ind].pos = -1;
    }
    *size += CHUNK;
}


/* return base to exp power */
int power(int base, int exp)
{
    int result;

    result = base;
    while (--exp)
        result *= base;
    return result;
}


/* qsort compare routine */
int compare (const void *str1, const void *str2)
{
    return (strcmp((*(table_str *) str1).str, (*(table_str *) str2).str));
}


/* write out the index entries */
write_index(struct RAB *idxptr, struct RAB *posptr)
{
    int ind, inc, status, dup_count;

    /* write out the words */
    /* sort keys for counts and $put performance */
    qsort(words, words_index, sizeof(table_str), compare);
    dup_count = 1;
    strcpy(prev_keyword, words[0].str);
    for (ind = 1; ind < words_index; ind++) {
        if (strcmp(words[ind].str, prev_keyword) == 0) {
            dup_count++;
            inc = 0;
        }
        else {
            sprintf(idx_record, "%-*s%0*d%0*d%0*d",
                max_word, prev_keyword,
                max_topic, db_index,
                max_wid, word_index,
                max_count, dup_count);
            status = sys$put(idxptr);
            if ((status & 1) != SS$_NORMAL)
                lib$stop(status);
            strcpy(prev_keyword, words[ind].str);
            dup_count = 1;
            inc = 1;
        }
        if (switch_present("position")) {
            sprintf(pos_record, "%0*d%0*d",
                max_wid, word_index,
                max_pos, words[ind-1].pos);
            status = sys$put(posptr);
            if ((status & 1) != SS$_NORMAL)
                lib$stop(status);
            word_index += inc;
        }
    }
    /* write out the last word */
    sprintf(idx_record, "%-*s%0*d%0*d%0*d",
            max_word, prev_keyword,
            max_topic, db_index,
            max_wid, word_index,
            max_count, dup_count);
    status = sys$put(idxptr);
    if ((status & 1) != SS$_NORMAL)
        lib$stop(status);
    if (switch_present("position")) {
        sprintf(pos_record, "%0*d%0*d",
            max_wid, word_index,
            max_pos, words[ind-1].pos);
        status = sys$put(posptr);
        if ((status & 1) != SS$_NORMAL)
            lib$stop(status);
        word_index++;
    }
}


/* reset topic context */
void reset_topic(FILE *src, char *desc, int *start_pos,
                 topic_str *topics, int *word_pos)
{
    int ind;

    strncpy(desc, "", sizeof(desc));
    *start_pos = ftell(src);          /* init start position */
    for (ind = 0; ind < words_index; ind++)  /* clear out words[] */
        *words[ind].str = '\0';
    words_index = 0;
    *word_pos = 0;
    for (ind = 0; ind < TOPIC_SIZE; ind++)  /* clear out topics[] */
        if (topics[ind].found)
            *topics[ind].found = '\0';
}


int test_size(char *desc, char *spec, int size)
{
    if (strlen(desc)+strlen(spec)+size+max_topic > sizeof(sel_record)-1) {
        printf("Selector is too large: %s\n", spec);
        return 0;
    }
    return 1;
}


/* take the URL from url_spec and add it to the selector */
/* merge filename with default url if no url given */
/* note: doesn't handle duplication of host, port between URL and selector */
void add_url(char *rec, char *filename)
{
    char method[100], host[200], path[300], new[500];
    char *cp, *cp2 = NULL, *ptr;

    /* parse the URL */
    strncpy(method, "", sizeof(method));
    strncpy(host, "", sizeof(host));
    strncpy(path, "", sizeof(path));
    if ((cp=strstr(url_spec, "://")) != NULL)
        strncpy(method, url_spec, cp-url_spec);
    if ((cp=strstr(url_spec, "//")) != NULL) {
        cp2 = strchr(cp+2, '/');
        if (cp2) {
            strncpy(host, cp+2, cp2-(cp+2));
            host[cp2-(cp+2)] = '\0';
        }
        else
            strcpy(host, cp+2);
    }
    if (cp2 == NULL)
        cp2 = url_spec;
    strcpy(path, cp2);

    /* if a partial selector = URL default, merge in real file name */
    ptr = switch_value("url.default");
    if (ptr && (strcmp(url_spec, ptr) == 0) &&
        (url_spec[strlen(url_spec)-1] == '/'))
        strcat(path, strchr(filename, ']')+1);
    if (strlen(url_fragment)) {
        strcat(path, "#");
        strcat(path, url_fragment);
    }
    if (strlen(path)) {
        sprintf(new, "|%s|%s|%s", host, method, path);
        if (test_size(rec, new, 10))
            strcat(rec, new);
    }
}


/* write out file selector then write out words */
void write_words(FILE *src, FILE *lnk, struct RAB *selptr, struct RAB *idxptr,
                 struct RAB *posptr, int *start_pos, int *end_pos, char *desc,
                 topic_str *topics, sep_type type, int *word_pos, char *punct,
                 int minimum_word)
{
    int ind, status, new_desc;
    char filename[256], *ptr, temp_desc[512] = "", end_char;
    char hostname[256], portname[10], ptype[25], path[SELECTOR_SIZE];

    keyword_index = -1;             /* stop indexing at end of section */
    if (!switch_present("link")) {

/* may only want to do topic.literal + selector */
/* if the topic is really empty, the test for an empty description will fail */
#if 0
        if (words_index == 0) {
            reset_topic(src, desc, start_pos, topics, word_pos);
            return;      /* no words to write */
        }
#endif
        if ((word_index + 2) >= power(10, max_topic)) {
            printf("You have reached %d words in this index\n", word_index);
            printf("Please re-index with /MAX_TOPIC larger than %d ", max_topic);
            printf("or use /NOPOSITION\n");
            exit(5);
        }
        if ((*word_pos + 2) >= power(10, max_topic)) {
            printf("You have reached %d words in this document\n", *word_pos);
            printf("Please re-index with /MAX_TOPIC larger than %d ", max_topic);
            printf("or use /NOPOSITION\n");
            exit(5);
        }
    }
    if (src != NULL)
        fgetname(src, filename);

    /* if /noversion, get rid of the version number */
    if (!switch_present("version")) {
        ptr = strchr(filename, ';');
        if (ptr) *ptr = '\0';
    }
    lower(filename);         /* force filename lowercase */
    new_desc = 0;
    /* build topic description */
    for (ind = 0; ind < TOPIC_SIZE; ind++) {
        if (topics[ind].literal && (strlen(topics[ind].literal) > 0)) {
            strcat(temp_desc, topics[ind].literal);
            new_desc = 1;
        }
        if (topics[ind].found && (strlen(topics[ind].found) > 0)) {
            /* literals can handle their own spacing */
            if ((strlen(temp_desc) > 0) && (topics[ind].literal == NULL))
                strcat(temp_desc, " ");
            strcat(temp_desc, topics[ind].found);
        }
        if (topics[ind].file == 1) {   /* just name.type */
            strcat(temp_desc, strchr(filename, ']')+1);
        }
        if (topics[ind].file == 2) {   /* full file spec */
            strcat(temp_desc, filename);
        }
    }
    if (strlen(temp_desc) > 0)
        strncpy(desc, temp_desc, DESC_SIZE);
    if (new_desc)  /* we have added new words via topic.literal */
        build_words(temp_desc, punct, minimum_word, word_pos);  /* index them */

    if (words_index == 0) {
        reset_topic(src, desc, start_pos, topics, word_pos);
        return;      /* no words to write */
    }

    /* if no selector found, copy in default selector */
    if ((strlen(selector_spec) == 0) && switch_present("selector.default")) {
        selector_spec = (char *) my_realloc((char *) selector_spec,
                        strlen(switch_value("selector.default")) + 1);
        strcpy(selector_spec, switch_value("selector.default"));
    }
    /* ditto for URLs */
    if ((strlen(url_spec) == 0) && switch_present("url.default")) {
        url_spec = (char *) my_realloc((char *) url_spec,
                        strlen(switch_value("url.default")) + 1);
        strcpy(url_spec, switch_value("url.default"));
    }

    if ((strlen(desc) == 0) ||               /* if no description or */
        ((switch_present("selector.text") &&
         !switch_present("selector.both") && /* selectors only */
         (strlen(selector_spec) == 0))) ||   /* and no selector found or */
        ((switch_present("url.text") &&
         !switch_present("url.both") &&      /* urls only */
         (strlen(url_spec) == 0)))) {        /* and no url found */
        reset_topic(src, desc, start_pos, topics, word_pos); /* reset topic stuff */
        return;                              /* and quit */
    }

    strncpy(hostname, "", sizeof(hostname));  /* init hostname */
    strncpy(portname, "", sizeof(portname));  /* init portname */

    /* we have a selector that applies to this article */
    if (strlen(selector_spec)) {
        if (switch_present("selector.ignore") && /* should we ignore this? */
            (strchr(switch_value("selector.ignore"), *selector_spec))) {
                reset_topic(src, desc, start_pos, topics, word_pos);
                return;                          /* yes, ignore this */
        }
        /* parse out the host and port from the selector */
        if (ptr = strchr(selector_spec, '|')) {
            *ptr = '\0';                /* mark off selector from host */
            sprintf(hostname, "%s", ptr + 1);  /* copy host/port */
            if (ptr = strchr(hostname, '|')) {
                *ptr = '\0';            /* mark off port from host */
                sprintf(portname, "%s", ptr + 1);
            }
        }

        /* /selector and /link */
        if (switch_present("link")) {
            fprintf(lnk, "Name=%s\nType=%c\n", desc, *selector_spec);
            fprintf(lnk, "Path=%s\n", selector_spec+1);
            fprintf(lnk, "Host=%s\nPort=%s\n\n",
                    strlen(hostname) ? hostname + 1 : "+",
                    strlen(portname) ? portname + 1 : "+");
        }
        else
            if (test_size(desc, selector_spec, strlen(hostname) +
                          strlen(portname) + 1)) {
                /* if a partial selector, merge in real file name */
                /* except if it is a gopher directory (gtype "1") */
                end_char = selector_spec[strlen(selector_spec)-1];
                if (end_char == ']' && *selector_spec != '1' )
                    strcat(selector_spec, strchr(filename, ']')+1);
                ptype[0] = *(selector_spec+1);
                /* break selector into ptype and path */
                if (*(selector_spec+1) != 'R')
                    if (*(selector_spec+1) == '\0')
                        ptr = selector_spec+1;  /* null ptype = null path */
                    else
                        ptr = selector_spec+2;
                else {
                    ptr = strchr(selector_spec+2, '-');
                    ptr = strchr(ptr+1, '-') + 1;
                    *(ptr-1) = '\0';
                }
                strncpy(ptype, "", sizeof(ptype)); 
                strncpy(ptype, selector_spec+1, ptr-selector_spec+1);
                strcpy(path, ptr);
                sprintf(sel_record, "%0*d%s\t|%c|%s|%s|%s%s%s",
                    max_topic, ++db_index, desc,
                    *selector_spec, ptype, path,
                    hostname, strlen(portname) ? ":" : "", portname);
                if (strlen(url_spec))
                    add_url(sel_record, filename);  /* add URL to selector */
                selptr->rab$w_rsz = strlen(sel_record);
                if (((status = sys$put(selptr)) & 1) != SS$_NORMAL)
                    lib$stop(status);
                write_index(idxptr, posptr);
            }
    }

    /* no selector or we want to include the default selector too */
    if ((strlen(selector_spec) == 0) || switch_present("selector.both")) {
        /* handle /link */
        if (switch_present("link")) {
            fprintf(lnk, "Name=%s\nType=0\n", desc);
            if (type == whole) /* whole file is a special case */
                fprintf(lnk, "Path=0%s\n", filename);
            else
                fprintf(lnk, "Path=R%d-%d-%s\n", *start_pos, *end_pos, filename);
            fprintf(lnk, "Port=+\nHost=+\n\n");
        }
        else {
            /* write out the selector */
            if (test_size(desc, filename, 20)) {
                if (type == whole) /* whole file is a special case */
                    sprintf(sel_record, "%0*d%s\t|0|0|%s",
                        max_topic, ++db_index, desc, filename);
                else
                    sprintf(sel_record, "%0*d%s\t|0|R%d-%d|%s",
                        max_topic, ++db_index, desc, *start_pos, *end_pos,
                        filename);
                if (strlen(url_spec))
                    add_url(sel_record, filename);  /* add URL to selector */
                selptr->rab$w_rsz = strlen(sel_record);
                if (((status = sys$put(selptr)) & 1) != SS$_NORMAL)
                    lib$stop(status);
                write_index(idxptr, posptr);
            }
        }
    }
    strcpy(selector_spec, "");  /* reset the current selector */
    strcpy(url_spec, "");
    strcpy(url_fragment, "");
    if (!switch_present("quiet"))
        printf("%s\n", desc);
    /* clear words[], topics, etc. */
    reset_topic(src, desc, start_pos, topics, word_pos);
}


/* read in a file of words */

void load_words(char *name, char *punct, table_str **table, int *table_size, int *table_index)
{
    FILE *nf;
    char *cp, *cp2, line[MAX_INPUT_LINE];
    static char file_name[256];
    short leng;
    int status;
    $DESCRIPTOR(name_dsc, file_name);

    if (!switch_present(name))
        return;

    strcpy(file_name, switch_value(name));
    if ((nf = fopen(file_name, "r")) == NULL) {
        printf("Can't read data file %s\n", file_name);
        return;
    }

    while (fgets(line, sizeof(line), nf)) {
        cp = strchr(line, '\n');
        if (cp) *cp = '\0';               /* remove newline */
        for (cp = line; *cp; cp++) {
            if (is_punct(*cp, punct) || iscntrl(*cp))
                *cp = ' ';                /* convert punctuation, tabs to spaces */
            *cp = tolower(*cp);           /* force lowercase */
        }
        while ((strlen(line) > 0) &&
               (line[strlen(line)-1] == ' '))
            line[strlen(line)-1] = '\0';  /* remove trailing blanks */
        for (cp = line; *cp; cp++)
            if (*cp > ' ') break;         /* find first non-blank char */
        strcpy(line, cp);                 /* remove leading blanks */

        strcat(line, " ");                /* line ends with a space */
        cp = line;
        while(cp2 = strchr(cp, ' ')) {    /* break at space boundary */
            *cp2 = '\0';
            if (strlen(cp) > 0) {
                if (*table_index == *table_size)  /* table full */
                    expand_table(table, table_size);
                strcpy((*table)[(*table_index)++].str, cp);
            }
            cp = cp2 + 1;
        }
    }

    fclose(nf);
}


/* see if a char is punctuation */

int is_punct(char ch, char *punct)
{
    char *ptr;

    for (ptr = punct; *ptr; ptr++)
        if (*ptr == ch)
            return TRUE;
    return FALSE;
}


/* see if field is spaces */

int is_spaces(char *line, int pos, int size)
{
    int index;

    if (strlen(line) < pos)
        return TRUE;
    for (index = 0; index < size; index++)
        if (!isspace(line[pos + index - 1]))
            return FALSE;
    return TRUE;
}


/* see if the word is noise */

int is_noise(char *word, int size, int minimum_word)
{
    int ind;

    if (strlen(word) < minimum_word)  /* simple heuristic saves lots of noise entries */
        return TRUE;
    if ((!switch_present("numbers")) && isdigit(*word))
        return TRUE; 
    for(ind = 0; ind < size; ind++) {
        if (noise[ind].str == NULL) return FALSE;
        if (strcmp(noise[ind].str, word) == 0)
            return TRUE;
    }
    return FALSE;
}


/* see if the word is candidate */

int is_candidate(char *word, int size)
{
    int ind;

    for(ind = 0; ind < size; ind++) {
        if (candidate[ind].str == NULL) return FALSE;
        if (strcmp(candidate[ind].str, word) == 0)
            return TRUE;
    }
    return FALSE;
}


/* see if we should exclude this file */

int exclude(char *file)
{
    char *ptr, excl[100], file_tab[6][40], excl_tab[6][40];
    int status, ind;
    struct fscndef file_list[6], excl_list[6],
                   scan_list[6] = {{(short) 0, (short) FSCN$_DEVICE, (long) 0},
                                   {(short) 0, (short) FSCN$_DIRECTORY, (long) 0},
                                   {(short) 0, (short) FSCN$_NAME, (long) 0},
                                   {(short) 0, (short) FSCN$_TYPE, (long) 0},
                                   {(short) 0, (short) FSCN$_VERSION, (long) 0},
                                   {(short) 0, (short) 0, (long) 0}};

    make_valid("exclude");
    memcpy(file_list, scan_list, sizeof(scan_list));

    if (((status = sys$filescan(descr(file), file_list, 0)) & 1) != SS$_NORMAL)
        lib$stop(status);
    for (ind = 0; ind < 5; ind++) {
        strncpy(file_tab[ind], (char *) file_list[ind].fscn$l_addr,
                file_list[ind].fscn$w_length);
        file_tab[ind][file_list[ind].fscn$w_length] = '\0';
    }
    while ((ptr = switch_value("exclude")) != NULL) {
        strcpy(excl, ptr);
        memcpy(excl_list, scan_list, sizeof(scan_list));
        if (((status = sys$filescan(descr(excl), excl_list, 0)) & 1) != SS$_NORMAL)
            lib$stop(status);
        for (ind = 0; ind < 5; ind++) {
            if (excl_list[ind].fscn$w_length == 0)
                strcpy(excl_tab[ind], "*");
            else {
                strncpy(excl_tab[ind], (char *) excl_list[ind].fscn$l_addr,
                        excl_list[ind].fscn$w_length);
                excl_tab[ind][excl_list[ind].fscn$w_length] = '\0';
            }
            if (str$match_wild(descr(file_tab[ind]), descr(excl_tab[ind])) ==
                STR$_NOMATCH)
                break;
        }
        if (ind == 5)
            return TRUE;  /* all fields match, exclude file */
    }
    return FALSE;
}


/* make a temp lowercase copy of a string */

char *lc(char *str)
{
# define N_STRING 4
    static char strings[N_STRING][MAX_INPUT_LINE];
    static int cur_string = -1;
    char *cp;

    if (++cur_string >= N_STRING) cur_string = 0;
    for (cp = strings[cur_string];; cp++) {
        *cp = tolower(*str++);
        if (*cp == '\0') break;
    }
    return strings[cur_string];
}


/* change a string to lower case */

void lower(char *str)
{
    char *cp;

    if (str)
        for (cp = str; *cp; cp++)
            *cp = tolower(*cp);
}


/* find where the string starts (origin 1) in record */

int find_str(char *record, char *str)
{
    char *cp, *lcr;

    if ((str == NULL) || (strlen(str) == 0))
        return 0;  /* zero means string not found */
    lcr = lc(record);
    cp = strstr(lcr, lc(str));
    if (cp == NULL) return 0;
    return (cp - lcr + 1);
}


/* descr() creates character descriptor and return the address
of the descriptor to the caller. */
# define N_DESCR 10
static struct dsc$descriptor_s str_desc[N_DESCR];
static int cur_descr = -1;

struct dsc$descriptor_s *descr(char *string)
{
    if(++cur_descr >= N_DESCR) cur_descr = 0;
    str_desc[cur_descr].dsc$w_length=(short)strlen(string);      
    str_desc[cur_descr].dsc$b_dtype=DSC$K_DTYPE_T;   
    str_desc[cur_descr].dsc$b_class=DSC$K_CLASS_S;  
    str_desc[cur_descr].dsc$a_pointer=string;     
    return &str_desc[cur_descr];
}


int get_decimal(char *ptr)
{
    do
        ptr++;
    while ((*ptr != '=')       /* skip to the keyword/parameter */
        && (*ptr != ':'));     /* separator character */
    while (isspace(*++ptr));   /* skip spaces */
    return (atoi(ptr) < 256) ? atoi(ptr) : 256;
}


char *get_text(char **dest, char *ptr)
{
    char *start, *cp;

    do 
        ptr++;
    while ((*ptr != '=')         /* skip to the keyword/parameter */
           && (*ptr != ':'));    /* separator character */
    while (isspace(*++ptr));     /* skip spaces */
    if (*ptr == '"') {           /* if quoted string */
        start = ++ptr;           /* skip over quote */
        for (; *ptr; ptr++) {    /* skip to ending quote */
            if ((*ptr == '"') && (*(ptr+1) == '"')) {  /* doubled quotes? */
                ptr++;           /* yes, skip it */
                continue;
            }
            if (*ptr == '"')     /* un-doubled quote? */
                break;           /* yes, stop here */
        }
    }
    else {                       /* else non-quoted string */
        start = ptr;             /* start of string */
        while (*ptr
               && (*ptr != ' ')
               && (*ptr != ',')
               && (*ptr != '/')
               && (*ptr != ')'))
            ptr++;               /* skip to string terminator */
    }
    *dest = (char *) calloc((ptr - start) + 1, sizeof(char));
    check_alloc(*dest);
    strncpy(*dest, start, ptr - start);
    for (cp = *dest; *cp; *cp++)   /* collapse doubled quotes to single */
        if ((*cp == '"') && (*(cp+1) == '"'))
            strcpy(cp, cp+1);
    return ptr;
}


int get_file_type(char *ptr)
{
    int status = 1;  /* assume no qualifier */

    do {
        ptr++;
        if (*ptr == '=') status = 2;  /* only one qualifier */
    } while (*ptr                     /* skip to next clause */
        && (*ptr != ',')              /* keyword */
        && (*ptr != '/')              /* switch */
        && (*ptr != ')'));            /* or parameter */
    return status;
}


/* parse command line for /topic */
void parse_topic(char *line, topic_str *topics)
{
    char *ptr, *start, **dest;
    static int index = -1;

    ptr = line;                        /* point to start of line */
    for (;;) {                         /* search for /topic until end of line */
        if (index == TOPIC_SIZE)
            return;                    /* exit if we can't hold any more */
        ptr = strchr(ptr, '/');        /* search for switch start */
        if (ptr == NULL)
            return;                    /* no more switches */
        while (isspace(*++ptr));       /* skip spaces */
        if (tolower(*ptr) != 't')      /* topic is unique to one character */
            continue;                  /* not /topic, keep scanning */
        do
            ptr++;
        while (*ptr
               && (*ptr != '=')        /* skip to the keyword/parameter */
               && (*ptr != ':'));      /* separator character */
        if (!*ptr) return;             /* ran out of command line */
        while (isspace(*++ptr));       /* skip spaces */
        if (*ptr == '(')               /* if start of list */
            while (isspace(*++ptr));   /* skip spaces */
        index++;                       /* next topics structure */
        topics[index].used = TRUE;     /* this topic index is used */
        topics[index].pos = DEFAULT_POS; /* default position */
        topics[index].end = "</";      /* default end to HTML end tag */
        for (;;) {                     /* parse all /topic list elements */
            if (tolower(*ptr) == 'e')  /* "end" or "exclude" */
                ptr++;                 /* so we match on 'n' or 'x' */
            if (tolower(*ptr) == 'f') {  /* "first" or "file" */
                ptr++; ptr++;            /* so we match on 'r' or 'l' */
                if (tolower(*ptr) == 'l')
                    *ptr = '1';          /* l = literal, make 1 instead */
            }
            switch (tolower(*ptr)) {
                case 'l':                        /* literal */
                    ptr = get_text(&topics[index].literal, ptr);
                    break;
                case 'd':                        /* default */
                    ptr = get_text(&topics[index].deftext, ptr);
                    break;
                case 't':                        /* text */
                    ptr = get_text(&topics[index].text, ptr);
                    lower(topics[index].text);
                    break;
                case 'n':                        /* end */
                    ptr = get_text(&topics[index].end, ptr);
                    lower(topics[index].end);
                    break;
                case 'p':                        /* position */
                    topics[index].pos = get_decimal(ptr);
                    break;
                case 's':                        /* size */
                    topics[index].size = get_decimal(ptr);
                    break;
                case 'o':                        /* offset */
                    topics[index].offset = get_decimal(ptr);
                    break;
                case 'x':                         /* exclude */
                    topics[index].exclude = TRUE; /* has no parameters */
                    break;
                case 'b':                              /* break */
                    topics[index].force_break = TRUE;  /* has no parameters */
                    break;
                case 'r':                        /* first */
                    topics[index].first = TRUE;  /* has no parameters */
                    break;
                case '1':                        /* file */
                    topics[index].file = get_file_type(ptr);  /* set option */
                    break;
            }
            while (*ptr
                   && (*ptr != ' ')              /* skip to end of */
                   && (*ptr != ',')              /* keyword */
                   && (*ptr != '/')              /* switch */
                   && (*ptr != ')'))             /* or parameter */
                ptr++;
            while (*ptr &&
                   ((*ptr <= ' ') ||             /* skip spaces, junk */
                    (*ptr == ',')))              /* list seperators */
                ptr++;
            if (*ptr == '\0')
                return;                          /* end of the line */
            if ((*ptr == ')') || (*ptr == '/'))
                break;                           /* end of the list */
        }                                        /* scan for more list elements */
    }
}


/* parse command line for /keyword */
void parse_keyword(char *line, topic_str *keywords)
{
    char *ptr, *start, **dest;
    static int index = -1;

/*  for (ptr = line; *ptr; ptr++)
        *ptr = tolower(*ptr);          /* force command line lowercase */

    ptr = line;                        /* point to start of line */
    for (;;) {                         /* search for /keyword until end of line */
        if (index == TOPIC_SIZE)
            return;                    /* exit if we can't hold any more */
        ptr = strchr(ptr, '/');        /* search for switch start */
        if (ptr == NULL)
            return;                    /* no more switches */
        while (isspace(*++ptr));       /* skip spaces */
        if (tolower(*ptr) != 'k')      /* keyword is unique to one character */
            continue;                  /* not /keyword, keep scanning */
        do
            ptr++;
        while ((*ptr != '=')           /* skip to the keyword/parameter */
               && (*ptr != ':'));      /* separator character */
        while (isspace(*++ptr));       /* skip spaces */
        if (*ptr == '(')               /* if start of list */
            while (isspace(*++ptr));   /* skip spaces */
        index++;                       /* next keywords structure */
        keywords[index].used = TRUE;   /* this keyword index is used */
        for (;;) {                     /* parse all /keyword list elements */
            if (tolower(*ptr) == 'e')  /* "end" or "exclude" */
                ptr++;                 /* so we match on 'n' or 'x' */
            switch (tolower(*ptr)) {
                case 't':                        /* text */
                    ptr = get_text(&keywords[index].text, ptr);
                    lower(keywords[index].text);
                    break;
                case 'n':                        /* end */
                    ptr = get_text(&keywords[index].end, ptr);
                    lower(keywords[index].end);
                    break;
                case 'x':                        /* exclude */
                    keywords[index].exclude = TRUE; /* has no parameters */
                    break;
            }
            while (*ptr
                   && (*ptr != ' ')              /* skip to end of */
                   && (*ptr != ',')              /* keyword */
                   && (*ptr != '/')              /* switch */
                   && (*ptr != ')'))             /* or parameter */
                ptr++;
            while (*ptr &&
                   ((*ptr <= ' ') ||             /* skip spaces, junk */
                    (*ptr == ',')))              /* list seperators */
                ptr++;
            if (*ptr == '\0')
                return;                          /* end of the line */
            if ((*ptr == ')') || (*ptr == '/'))
                break;                           /* end of the list */
        }                                        /* scan for more list elements */
    }
}
                                                       

int parse_switch(char *name, switch_str sw[], int multi, int def_sw,
                  char *def_value)
{
    int status, ind;
    short leng;
    char *ptr;
    static char sw_value[100];
    $DESCRIPTOR(switch_dsc, sw_value);

    for (ind = 0; sw[ind].name; ind++)  /* find end of sw[] */
        if ((!multi || (sw[ind].value == NULL)) &&  /* empty multi entry */
            (strcmp(sw[ind].name, name) == 0))  /* or a pre-existing switch */
            break;

    if (sw[ind].name == NULL) {         /* register name first time */
        sw[ind].name = calloc(strlen(name) + 1, sizeof(char));
        check_alloc(sw[ind].name);
        strcpy(sw[ind].name, name);
        sw[ind].state = def_sw;         /* and set its default state */
        sw[ind].multiple = multi;
    }

    if (def_sw)    /* if default on, remember if it's turned off */
        sw[ind].state &= (cli$present(descr(name)) & 1);
    else           /* if default off, remember if it's turned on */
        sw[ind].state |= (cli$present(descr(name)) & 1);

    status = cli$get_value(descr(name), &switch_dsc, &leng);
    if (status & 1) {
        sw_value[leng] = '\0';
        /* if this is the first value or a non-default value, save it */
        if ((sw[ind].value == NULL) || strcmp(def_value, sw_value)) {
            if (*sw_value == '"') {  /* remove quotes from quoted string */
              strncpy(sw_value, sw_value + 1, leng - 2);
              sw_value[leng - 2] = '\0';
            }
            for (ptr = sw_value; *ptr; ptr++)
                if ((*ptr == '"') && (*(ptr+1) == '"'))  /* doubled quotes? */
                    strcpy(ptr, ptr+1);            /* yes, make single quote */
            sw[ind].value = calloc(strlen(sw_value) + 1, sizeof(char));
            check_alloc(sw[ind].value);
            strcpy(sw[ind].value, sw_value);
        }
    }
    else    /* hack around CLI bug that doesn't do selector.end right */
        if (def_value) {  /* no value, but default exists */
            sw[ind].value = calloc(strlen(def_value) + 1, sizeof(char));
            check_alloc(sw[ind].value);
            strcpy(sw[ind].value, def_value);
            sw[ind].state = def_sw;
        }
    return status;
}


void parse_commands(dsc *input, switch_str sw[])
{
    cli$dcl_parse(input, index_commands);

    parse_switch("add", sw, 0, 0, 0);
    parse_switch("candidates", sw, 0, 0, 0);
    parse_switch("character", sw, 0, 0, 0);
    parse_switch("count", sw, 0, 0, 0);
    parse_switch("dash", sw, 0, 0, 0);
    parse_switch("default_topic", sw, 0, 1, 0);
    parse_switch("equal", sw, 0, 0, 0);
    while(parse_switch("exclude", sw, 1, 0, 0) & 1);
    parse_switch("ff", sw, 0, 0, 0);
    parse_switch("field", sw, 0, 0, 0);
    parse_switch("field.position", sw, 0, 0, 0);
    parse_switch("field.size", sw, 0, 0, 0);
    while (parse_switch("file", sw, 1, 0, 0) & 1);  /* read file list */
    parse_switch("helpfile.selector", sw, 0, 0, 0);
    parse_switch("helpfile.title", sw, 0, 0, 0);
    parse_switch("keyword", sw, 0, 0, 0);
    parse_switch("line", sw, 0, 0, 0);
    parse_switch("link", sw, 0, 0, 0);
    parse_switch("link.sort", sw, 0, 0, 0);
    parse_switch("max_topics", sw, 0, 1, "6");
    parse_switch("minimum_word", sw, 0, 1, "3");
    parse_switch("noise", sw, 0, 0, 0);
    parse_switch("numbers", sw, 0, 1, 0);
    parse_switch("output", sw, 0, 0, 0);
    parse_switch("paragraph", sw, 0, 0, 0);
    parse_switch("position", sw, 0, 0, 0);
    parse_switch("punctuation", sw, 0, 0, 0);
    parse_switch("quiet", sw, 0, 0, 0);
    parse_switch("selector", sw, 0, 0, 0);
    parse_switch("selector.text", sw, 0, 0, 0);
    parse_switch("selector.end", sw, 0, 1, " -->");
    parse_switch("selector.default", sw, 0, 0, 0);
    parse_switch("selector.both", sw, 0, 0, 0);
    parse_switch("selector.ignore", sw, 0, 0, 0);
    parse_switch("sequential", sw, 0, 0, 0);
    parse_switch("url", sw, 0, 0, 0);
    parse_switch("url.text", sw, 0, 0, 0);
    parse_switch("url.end", sw, 0, 1, "\">");
    parse_switch("url.default", sw, 0, 0, 0);
    parse_switch("url.both", sw, 0, 0, 0);
    parse_switch("url.prefix", sw, 0, 0, 0);
    parse_switch("url.fragment", sw, 0, 0, 0);
    parse_switch("version", sw, 0, 0, 0);
    parse_switch("whole", sw, 0, 0, 0);
    parse_switch("word_length", sw, 0, 1, "20");

    if (switch_present("keyword")) {
        keyword_index = -1;
        parse_keyword(input->dsc$a_pointer, keywords); /* fill keywords[] */
    }
 
    parse_topic(input->dsc$a_pointer, topics); /* parse the command line and fill topics */
}


int switch_present(char *name)
{
    int ind;

    for (ind = 0; switches[ind].name; ind++)
        if (strcmp(name, switches[ind].name) == 0)
            break;
    if (switches[ind].name == NULL) return FALSE;
    return switches[ind].state;
}
     

char *switch_value(char *name)
{
    int ind;

    for (ind = 0; switches[ind].name; ind++)
        if ((strcmp(name, switches[ind].name) == 0) &&
            (switches[ind].multiple < 2))
            break;
    if (switches[ind].multiple == 1)
        switches[ind].multiple = 2;  /* invalidate current entry */
    return switches[ind].value;
}


void make_valid(char *name)
{
    int ind;

    for (ind = 0; switches[ind].name; ind++)
        if ((strcmp(name, switches[ind].name) == 0) &&
            (switches[ind].multiple == 2))
            switches[ind].multiple = 1;
}


void *my_realloc(void *mem, int size)
{
    void *ptr;

    if (mem == (void *) 0) {
        ptr = (void *) malloc(size);
        check_alloc(ptr);
        return ptr;
    }
    else {
        ptr = (void *) realloc(mem, size);
        check_alloc(ptr);
        return ptr;
    }
}

/* sanity check memory allocation calls */
void check_alloc(void *ptr)
{
    int status = SS$_INSFMEM;

    if (ptr == (void *) 0) {
        lib$stop(status);
    }
}