From: Didier Morandi [Didier.Morandi@gmx.ch] Sent: Tuesday, January 22, 2002 2:46 PM To: Info-VAX@Mvb.Saic.Com Subject: DCL minute of the GMT+1 nite: Astalavista v1.2 Searching the two FREEWARE CDs (which are actually three) for an occurrence gives too much occurrences if you use one word without separators, for example DCL. Of course, you can do a search on " DCL " but you will miss items separated with commas, one space, a semi-colon, a single quote, two double quotes, etc. Here is ASTALAVISTA.COM version 2-1 which allows this item separation search. To give better results, it now adds in the thesaurus file each word enclosed with a space. The interest compared to a single search on the whole disk is speed. the B (build) function builds a thesaurus for a whole disk with, for all words found, the full specification of the files where they were found. The "S" (search) is done once from the thesaurus, and then from the target file(s), which obviously goes much faster than scanning the whole files each time you search for an occurrence. Enjoy. $!+ $! ASTALAVISTA.COM $! How to build a single word index with DCL and find line numbered occurrences. $! V1.0-0 14-mar-2001 D. Morandi (Didier.Morandi@gmx.ch) $! V1.1-0 23-may-2001 DMo add multi-extention processing (*.*) $! V1.2-0 09-nov-2001 DMo add HTML skip table. $! V1.2-1 22-jan-2002 DMo fix typo when previous thesaurus was found $!- $ set noon $ say = "write sys$output" $START: $ if p1 .eqs. "" then inq p1 "Search or Build (S/B)" $ if p1 .eqs. "" then exit $ p1 = f$extract(0,1,f$edit(p1,"upcase")) $ if p1 .nes. "S" .and. p1 .nes. "B" $ then $ p1 = "" $ goto START $ endif $ if p1 .eqs. "S" then goto DO_SEARCH $EXT_PLEASE: $ if p2 .eqs. "" then - inq p2 "Disk:[dir]*.ext or [] ([*...]*.* allowed) =end" $ if p2 .eqs. "" then exit $ doc_type = f$parse(p2,,,"type") $ if doc_type .eqs. "." $ then $ say "" $ say "Please supply an extension for your files" $ say "" $ p2 = "" $ goto EXT_PLEASE $ endif $ if f$search(p2) .eqs. "" $ then $ say "" $ say "I'm afraid there are no ",p2," files here, Sir." $ say "" $ p2 = "" $ goto EXT_PLEASE $ endif $ p2 = p2 - ";" $ doc_type = f$edit(doc_type,"upcase") - "." $ if f$search("sys$login:thesaurus_''doc_type'.dat") .nes. "" .and. - doc_type .nes. "*" $ then $ say "" $ say "A previous THESAURUS_''doc_type'.DAT file exists in SYS$LOGIN:" $ inq ok "Do you want to build a new one [Y/N]?" $ if .not. ok then exit $ endif $ if doc_type .eqs. "COM" $ then $ skip_table = ".and..eq..eqs..gt..ge..ges..gts..le..les..lt..ne..nes..or." $ skip_table = skip_table + """#&'()-|_@[]$*,;:! " $ skip_table = skip_table + "ifthenelseendifgosubgotoreturnexit$deck$eod" $ skip_table = skip_table + "openclosereadwriteappendend" $ endif $ if doc_type .eqs. "HTM" .or. doc_type .eqs. "HTML" $ then $ skip_table = "

" $ skip_table = skip_table + - "

" $! (to be completed...) $ endif $ if f$search("dir.temp") .nes. "" then dele_ dir.temp;* $ start_time = f$time() $ say "" $ say "Time is ",start_time $ say "working..." $ say "" $ on warning then exit $ say "[building documents list]" $ dire_/col=1/notrail/noheader/out=dir.temp/exclude=[vms$common...] 'p2'; $ set noon $ close/nolog ch $ close/nolog ch2 $ close/nolog ch3 $ nbf = 0 $ nbw = 0 $ open/read ch dir.temp $ if f$search("temp$temp.not_sorted") .nes. "" then dele_ temp$temp.not_sorted.* $ say "[processing documents]" $ say "" $ open/write ch3 temp$temp.not_sorted $LOOP: $ read/end=EOF ch file $ file = f$edit(file,"lowercase") $ say "processing ",file $ nbf = nbf + 1 $ open/read ch2 'file' $LOOP2: $ read/end=EOF2 ch2 line $ line=f$edit(line,"trim,compress,lowercase,uncomment") $ if line .eqs. "$" .or. line .eqs. "$ " .or. line .eqs. "$!" then goto LOOP2 $ if line .eqs. "" .or. line .eqs. " " then goto LOOP2 $ if f$locate(" ",line) .eq. f$len(line) $ then $ line = line - "'" - "'" - "'" - "," - """" - """" - "" - " " $ line = line - "'" - "'" - "'" - "," - """" - """" - "" - " " $ if f$extract(f$len(line)-1,1,line) .nes. ":" then - write ch3 line," ",file $ goto LOOP2 $ endif $ i=0 $LOOP3: $ word = f$element(i," ",line) $ word = word - "'" - "'" - "'" - "," - """" - """" - "" - " " $ word = word - "'" - "'" - "'" - "," - """" - """" - "" - " " $ if word .eqs. " " .or. word .eqs. "" then goto LOOP2 $ if f$extract(0,1,word) .eqs. " " then goto LOOP2 $ i=i+1 $ if doc_type .eqs. "COM" $ then $ if f$extract(f$len(word)-1,1,word) .eqs. ":" then goto LOOP3 !skip labels $ if f$locate(word,skip_table) .ne. f$len(skip_table) then goto LOOP3 $ if f$extract(0,2,word) .eqs. "$!" then goto LOOP3 !skip comments $ endif $ if doc_type .eqs. "HTM" .or. doc_type .eqs. "HTML" $ then $ if f$locate(word,skip_table) .ne. f$len(skip_table) then goto LOOP3 $ endif $ write ch3 " ",f$edit(word,"trim,compress,uncomment,lowercase")," ",file $ nbw = nbw + 1 $ goto LOOP3 $EOF2: $ close ch2 $ goto LOOP $EOF: $ close ch $ close ch3 $!+ $! NB If you build the system disk thesaurus, log in under the system account $! or you will run out of virtual memory *now* ! $!- $ say "[end of processing]" $ say "[sorting resulting file]" $ if doc_type .eqs. "*" then doc_type = "" !delete symbol value $ sort/key=(pos:1,size:80) temp$temp.not_sorted temp$temp.sorted $ say "[removing duplicates]" $ merge/key=(pos:1,size:80)/nodup temp$temp.sorted - sys$login:thesaurus_'doc_type'.dat $ dele_ temp$temp.not_sorted;*, temp$temp.sorted;*, dir.temp;* $ say "[done. Thesaurus file is SYS$LOGIN:THESAURUS_''DOC_TYPE'.DAT]" $ say "" $ say "starting time was ",start_time $ say "ending time is ",f$time() $ say nbw," words stored from ",nbf," files in ",p2 $ say "" $ exit $!------------------------------------------------------------------------------ $DO_SEARCH: $ if p2 .eqs. "" then inq p2 - "Item to search (enclose more than one with double quotes)" $ if p2 .eqs. "" then exit $ if f$search("search.temp") .nes. "" then dele_ search.temp;* $ search = "search" $ search/output=search.temp sys$login:thesaurus_*.dat " ''p2' " $ if $severity .eqs. "1" $ then $ close/nolog ch $ open/read ch search.temp $LOOP_S: $ read/end=EOF_S ch line $ file = f$element(2," ",line) $ if file .eqs. " " .or. file .eqs. "" then goto LOOP_S $ say "" $ say f$edit(file,"upcase") $ if p3 .eqs. "" $ then $ search/numbers 'file' " ''p2' ","""''p2'""","""''p2' "," ''p2'""", - ",''p2' "," ''p2',",",''p2'," $ else $ search/log/noout 'file' " ''p2' ","""''p2'""","""''p2' "," ''p2'""", - ",''p2' "," ''p2',",",''p2'," $ endif $ goto LOOP_S $EOF_S: $ close ch $ endif $ dele_ search.temp;* $ exit D. -- ---------------------------------------------------------------------- MORANDI Consulting. WEB: http://Didier.Morandi.Free.fr/index_us.html Pflanzschulstrasse 53, 8004 Zurich, Switzerland. GSM: +41 79 705 4670 19, chemin de la Butte, 31400 Toulouse, France. Disaster Recovery Plans, Computer Security Audits, DEC OpenVMS Expertise On parle français Man spricht Deutsch se habla Castellano English spoken