Everhart, Glenn (FUSA) From: Jerry Leichter [leichter@lrw.com] Sent: Thursday, April 29, 1999 11:07 PM To: Info-VAX@Mvb.Saic.Com Subject: RE: OpenVMS Equivalent to the UNIX split command Does anyone know if there is an OpenVMS equivalent to the UNIX split command - on freeware/shareware anywhere? Different Unix split commands do different things. The old DECUS C library contained a splita program that was originally designed to split DECUS C standard archives. I later modified it to split arbitrary text files; this form might prove useful to you. Here it is: -------------------------Cut here------------------------- /* * Split an archive or text file into managable pieces. */ /*)BUILD $(TKBOPTIONS) = { TASK = ...SPA } */ #ifdef DOCUMENTATION title splita Split an Archive or Text File into Smaller Pieces index Split an archive or text file into smaller pieces synopsis splita [-byte_limit] [-f[m]] input_file output_model description Splits a file into smaller files (output_model.001, etc.). By default, the file is a DECUS C archive, in which case splits will occur at the end of archive elements, and an identifying header, which describes the contents of the file and which archx will interpret as calling for the creation of an empty file on the null device, will be inserted at the beginning of each file produced. If -f is specified, the file is treated as a simple text file. Splits may then occur after any newline. If the option is given as -fm, a marker line is inserted at the beginning and end of each created file. This is useful if the parts of the file are sent through mailers that may append garbage like extra blank lines. A marker line is exactly the following: --------------------[Cut here]-------------------- There are 20 dashes before and after the "[Cut here]". In addition, splita will add an identifying header before the leading marker line. The format of the header is: Part nnn, file ffff where "nnn" is three digits, with possible leading 0's. The byte_limit (default 50000) may be set by an option. If no output_model is present, the input_file is used instead. In all cases, everything after the first "." in the model used is discarded. diagnostics File error messages only. author Martin Minow Jerry Leichter bugs Can only produce up to 999 parts. Perhaps markers would be useful in archive mode, too? #endif /* * Edit history * 0.0 ??-???-?? MM Invention * 1.0 2-Oct-86 JSL Added -f[m]. Options can now occur anywhere, not * just as the first command-line argument. Bugs * fixed: byte_limit wasn't being initialized to * anything (effectively, 0). The header lines inserted * in split archives used the current output file name, * rather than the input file name. Those lines now * refer to _nl:, rather than nl: - just in case someone * is silly enough to assign something as the null * device! * 1.1 22-Oct-86 JSL Bugfix: Get the bytelimit value from the argument * currently being looked at, not always from argument 1! */ #include #include #ifdef vms #include #include #define IO_SUCCESS (SS$_NORMAL | STS$M_INHIB_MSG) #define IO_ERROR SS$_ABORT #endif /* * Note: IO_SUCCESS and IO_ERROR are defined in the Decus C stdio.h file */ #ifndef IO_SUCCESS #define IO_SUCCESS 0 #endif #ifndef IO_ERROR #define IO_ERROR 1 #endif #define EOS 0 #define FALSE 0 #define TRUE 1 char line[513] = "\n"; char mark[] = "--------------------[Cut here]--------------------"; char *inputname = NULL; char outname[513]; char outfilename[513]; FILE *outfd; int filecount; int linecount; int domark = FALSE; /* Insert marker */ int archive = TRUE; /* Handle input as archive */ long bytecount; long bytelimit = 50000; extern char *strchr(); extern long atol(); main(argc, argv) int argc; /* Arg count */ char *argv[]; /* Arg vector */ { register char *tp; register int i; register int oargc; oargc = argc; for (i = 1; i < oargc; i++) { tp = argv[i]; if (tp[0] == '-') { argc--; if (isdigit(tp[1])) { bytelimit = atol(&tp[1]); } else if (tp[1] == 'f') { archive = FALSE; if (tp[2] != 'm' && tp[2] != EOS) usage(); domark = (tp[2] == 'm'); } else usage(); } else if (inputname == NULL) { inputname = tp; strcpy(outname, inputname); } else strcpy(outname, tp); } if (argc <= 1 || argc > 3) usage(); if (freopen(inputname, "r", stdin) == NULL) { perror(inputname); exit(IO_ERROR); } /* * Mash the '.' in the output model */ if ((tp = strchr(outname, '.')) == NULL) tp = outname + strlen(outname); *tp = EOS; outfd = NULL; filecount = 0; linecount = 0; openanother(); process(); closeout(); exit(IO_SUCCESS); } process() /* * Process archive open on stdin */ { while (fgets(line, sizeof line, stdin) != NULL) { if (outfd == NULL || (bytecount >= bytelimit && (!archive || ( line[0] == '-' && line[1] == 'h' && line[2] == '-' ) ) ) ) { closeout(); openanother(); } linecount++; bytecount += strlen(line); fputs(line, outfd); } } openanother() { filecount++; sprintf(outfilename, "%s.%03d", outname, filecount); #ifdef vms { int channel; extern FILE *fdopen(); if ((channel = creat(outfilename, 0, "rat=cr", "rfm=var")) == -1) outfd = NULL; else outfd = fdopen(channel, "w"); } #else outfd = fopen(outfilename, "w"); #endif if (outfd == NULL) { perror(outfilename); exit(IO_ERROR); } printf("%s starts at %s", outfilename, line); if (strchr(line, '\n') == NULL) putchar('\n'); bytecount = 0; linecount = 0; if (archive) fprintf(outfd, "-h- _nl: (%s, part %d)\n", inputname, filecount); if (domark) { fprintf(outfd, "Part %03d, file %s\n", filecount, inputname); fprintf(outfd, "%s\n", mark); } } closeout() { if (outfd != NULL) { if (domark) fprintf(outfd, "%s\n", mark); fclose(outfd); printf("%s finished, %d records, %ld bytes\n", outfilename, linecount, bytecount); } } usage() { fprintf(stderr, "Usage: splita [-byte_limit] [-f[m]] input_file [output_model]\n"); exit(IO_ERROR); } -------------------------Cut here------------------------- As a special bonus, here's a program that performs the inverse function: It combines back files that were split using splita with the -fm option: -------------------------Cut here------------------------- /* * Unsplit a text file created with splita -fm */ /*)BUILD $(TKBOPTIONS) = { TASK = ...SPA } */ #ifdef DOCUMENTATION #endif /* * Edit history * 0.0 23-Nov-87 JSL Invention. */ #include #include #ifdef vms #include #include #define IO_SUCCESS (SS$_NORMAL | STS$M_INHIB_MSG) #define IO_ERROR SS$_ABORT #endif /* * Note: IO_SUCCESS and IO_ERROR are defined in the Decus C stdio.h file */ #ifndef IO_SUCCESS #define IO_SUCCESS 0 #endif #ifndef IO_ERROR #define IO_ERROR 1 #endif #define EOS 0 #define FALSE 0 #define TRUE 1 #define streq(s,t) (strcmp(s,t) == 0) char line[513]; char mark[] = "--------------------[Cut here]--------------------\n"; char setname[513]; int setnameset = FALSE; char inname[513]; char infilename[513]; FILE *infd; int filecount; extern char *strchr(); extern long atol(); main(argc, argv) int argc; /* Arg count */ char *argv[]; /* Arg vector */ { register char *tp; register int i; register int oargc; #ifdef vms argc = getredirection(argc, argv); #endif oargc = argc; for (i = 1; i < oargc; i++) { tp = argv[i]; strcpy(inname, tp); } /* * Mash the '.' in the input model */ if ((tp = strchr(inname, '.')) == NULL) tp = inname + strlen(inname); *tp = EOS; infd = NULL; filecount = 0; openanother(); process(); exit(IO_SUCCESS); } process() /* * Process file open on infd */ { while (fgets(line,sizeof(line),infd) != NULL) { if (streq(line,mark)) { if (openanother()) continue; else return; } fputs(line,stdout); } fprintf(stderr,"- Trailing mark not found\n",infilename); exit(IO_ERROR); } openanother() { char xline[sizeof(line)]; char *this; char *prev; char *temp; int thisnumb; this = line; prev = xline; prev[0] = EOS; filecount++; sprintf(infilename,"%s.%03d",inname,filecount); infd = fopen(infilename,"r"); if (infd == NULL) { fprintf(stderr,"\n"); return(FALSE); } fprintf(stderr,"\nProcessing %s",infilename); while (fgets(this,sizeof(line),infd) != NULL) { if (streq(this,mark)) goto found; temp = this; this = prev; prev = temp; } fprintf(stderr," - No leading marker found\n",infilename); exit(IO_ERROR); found: if ((sscanf(prev,"Part %03d, file %s\n",&thisnumb,&xline) != 2) || thisnumb != filecount || (setnameset && !streq(setname,xline))) { fprintf(stderr," - not part of set\n"); exit(IO_ERROR); } if (!setnameset) { strcpy(setname,xline); setnameset = TRUE; } return(TRUE); } /* * getredirection() is intended to aid in porting C programs * to VMS (Vax-11 C) which does not support '>' and '<' * I/O redirection. With suitable modification, it may * useful for other portability problems as well. * * Modified, 24-Jan-86 by Jerry Leichter * When creating a new output file, force the maximum record size to * 512; otherwise, it ends up as 0 (though the C I/O system won't write * a record longer than 512 bytes anyway) which will cause problems if * the file is later opened for APPEND - if the maximum record size is * 0, C will use the length of the longest record written to the file * for its buffer! */ #ifdef vms #include #include int getredirection(argc, argv) int argc; char **argv; /* * Process vms redirection arg's. Exit if any error is seen. * If getredirection() processes an argument, it is erased * from the vector. getredirection() returns a new argc value. * * Warning: do not try to simplify the code for vms. The code * presupposes that getredirection() is called before any data is * read from stdin or written to stdout. * * Normal usage is as follows: * * main(argc, argv) * int argc; * char *argv[]; * { * argc = getredirection(argc, argv); * } */ { register char *ap; /* Argument pointer */ int i; /* argv[] index */ int j; /* Output index */ int file; /* File_descriptor */ for (j = i = 1; i < argc; i++) { /* Do all arguments */ switch (*(ap = argv[i])) { case '<': /* ': /* >file or >>file */ if (*++ap == '>') { /* >>file */ /* * If the file exists, and is writable by us, * call freopen to append to the file (using the * file's current attributes). Otherwise, create * a new file with "vanilla" attributes as if * the argument was given as ">filename". * access(name, 2) is TRUE if we can write on * the specified file. */ if (access(++ap, 2) == 0) { if (freopen(ap, "a", stdout) != NULL) break; /* Exit case statement */ perror(ap); /* Error, can't append */ exit(errno); /* After access test */ } /* If file accessable */ } /* * On vms, we want to create the file using "standard" * record attributes. create(...) creates the file * using the caller's default protection mask and * "variable length, implied carriage return" * attributes. dup2() associates the file with stdout. */ if ((file = creat(ap, 0, "rat=cr", "rfm=var", "mrs=512")) == -1 || dup2(file, fileno(stdout)) == -1) { perror(ap); /* Can't create file */ exit(errno); /* is a fatal error */ } /* If '>' creation */ break; /* Exit case test */ default: argv[j++] = ap; /* Not a redirector */ break; /* Exit case test */ } } /* For all arguments */ argv[j] = NULL; /* Terminate argv[] */ return (j); /* Return new argc */ } #else getredirection(argc, argv) int argc; char *argv[]; /* * Dummy routine. */ { return (argv[0], argc); } #endif -------------------------Cut here------------------------- -- Jerry