/*****************************************************************************/ /* [WASD]Extract.c CGI-compliant script (working in concert with QUERY.C) to: 1. extract a specified range of lines from a plain text file 2. anchor hits of the keyword in an HTML file Extended file specifications may be expressed using either RMS-escaped ("^_") or URL-escaped ("%nn") forbidden characters. If a version delimiter (';', with or without version number) is present in the path specification then this script displays and anchors RMS-escaped and VM syntax file names. If none is present it supplies URL-encoded file names. When extracting HTML files it returns the entire document but with each occurance of the hit enclosed by a '' anchor that allows the specific hit to be jumped to with relative document syntax. If an HTML file is "extracted" without either CGI variable 'form_anchor' or 'form_plain' being non-empty a 302 redirection is generated direct to the document itself (on the assumption that it is a self-relative link within an "extract"-anchored document, with the consequent loss of any partial-document reference (i.e. #blah)). The following tags to not have any content included: , , , , . "Text" file extensions are predefined in the DEFAULT_TEXT_TYPES and DEFAULT_HTML_TYPES macros. To specify a custom list use /TEXT= and /HTML= or to add other extensions to be considered text or HTML use /ADDTEXT= and /ADDHTML= (not this is a comma-separated list with no extension period). File extensions may contain the asterisk wildcard character, representing zero or more matching characters (e.g. "REPORT_*"). CGI FORM ELEMENTS ----------------- form_anchor= introduce "hit" anchors into an HTML file form_case= was a case sensitive search (Y or N) form_end= record (line) number to being extract form_exact= exact number of records (Y or N) form_extract= number of line to pass to extract utility form_highlight= the string to be highlighted form_html= comma-separated list of HTML file extensions (overrides the /HTML and /ADDHTML qualifiers) form_plain= treat an HTML file as plain text form_requery= URI used for the query and for the requery link form_start= record (line) number to being extract form_text= comma-separated list of text file extensions (overrides the /TEXT and /ADDTEXT qualifiers) Generally these form elements will be generated by QUERY.C but there is no reasons why they shouldn't come from somewhere else. LOGICAL NAMES ------------- EXTRACT$DBUG turns on all "if (Debug)" statements EXTRACT$PARAM equivalent to (overrides) the command line parameters/qualifiers (define as a system-wide logical) QUALIFIERS ---------- /ADDHTML= additional list of comma separated HTML file types /ADDTEXT= additional list of comma separated TEXT file types /CHARSET= "Content-Type: text/html; charset=...", empty suppress charset /DBUG turns on all "if (Debug)" statements /[NO]ODS5 control extended file specification (basically for testing) /STYLE= URL for site CSS style sheet /TEXT= complete list of comma separated TEXT file types OSU ENVIRONMENT --------------- Script responses are returned in OSU "raw" mode; the script taking care of the full response header and correctly carriage-controlled data stream, text or binary!! Uses the CGILIB.C to engage in the dialog phase generating, storing and then making available the equivalent of CGI variables. "VANILLA" CGI ENVIRONMENT ------------------------- Primarily for the likes of Netscape FastTrack. This environment can accomodate CGI variables that are not prefixed with "WWW_" and do not supply "KEY_xxxxx" or "FORM_xxxxx" (which must be derived from "QUERY_STRING"). Full HTTP stream (non-parsed header) is assumed as not supported so all output occurs with a CGI-compliant header line (e.g. "Status: 200 Success") and record-oriented output. BUILD DETAILS ------------- See BUILD_EXTRACT.COM procedure. COPYRIGHT --------- Copyright (C) 1996-2021 Mark G.Daniel Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. VERSION HISTORY (update SOFTWAREVN as well!) --------------- 28-JUL-2020 MGD v4.2.0, highlighted hits now using use "athitof" instead of "__hit__" bugfix; style tag processing 12-MAR-2020 MGD v4.1.0, ensure CGI response if no extract 04-OCT-2014 MGD v4.0.0, a nod to the twenty-first century 13-NOV-2011 MGD v3.3.3, bugfix; tag counting 10-MAY-2005 MGD v3.3.2, SWS 2.0 ignore query string components supplied as command-line parameters differently to CSWS 1.2/3 23-DEC-2003 MGD v3.3.1, minor conditional mods to support IA64 23-JUN-2003 MGD v3.3.0, record size increased to maximum (32767 bytes) 12-APR-2003 MGD v3.2.4, link colour changed to 0000cc 15-AUG-2002 MGD v3.2.3, GetParameters() mod for direct CSWS 1.2 support 01-JUL-2001 MGD v3.2.2, add 'SkipParameters' for direct OSU support 25-JAN-2001 MGD v3.2.1, use to terminate processing 28-OCT-2000 MGD v3.2.0, use CGILIB object module 02-MAR-2000 MGD v3.1.2, bugfix;ed again:^( rework SameFileType() 28-FEB-2000 MGD v3.1.1, bugfix; SameFileType() wildcard processing 15-FEB-2000 MGD v3.1.0, allow wildcarded file types 18-JAN-2000 MGD v3.0.0, support extended file specifications (ODS-5) 07-AUG-1999 MGD v2.7.0, use more of the CGILIB functionality 24-APR-1999 MGD v2.6.0, use CGILIB.C, standard CGI environment (Netscape FastTrack) 20-NOV-1998 MGD v2.5.1, exclude certain content (e.g. ", 9)) { InsideScript = false; memset (tptr, MASK_TAG_CHAR, 9); tptr += 9; rptr += 9; continue; } } if (InsideServer) { if (strsame (rptr, "", 9)) { InsideServer = false; memset (tptr, MASK_TAG_CHAR, 9); tptr += 9; rptr += 9; continue; } } if (InsideStyle) { if (strsame (rptr, "", 8)) { InsideStyle = false; memset (tptr, MASK_TAG_CHAR, 8); tptr += 8; rptr += 8; continue; } } if (InsideTitle) { if (strsame (rptr, "", 7)) { InsideTitle = false; memset (tptr, MASK_TAG_CHAR, 7); tptr += 7; rptr += 7; continue; } } } if (*rptr == '<') { if (*((ULONGPTR)rptr) == '\n", SoftwareID); return (status); } /*****************************************************************************/ /* This function accepts a comma-separated list of (possibly wildcarded) file types (extensions, e.g. "TXT,TEXT,COM,C,PAS,FOR,RPT*") and a VMS file type (e.g. ".TXT;", ".TXT", "TXT"). It returns true if the file type is in the list, false if not. */ BOOL SameFileType ( char *TypeList, char *FileType ) { char ch; char *cptr, *sptr; /*********/ /* begin */ /*********/ if (Debug) fprintf (stdout, "SameFileType() |%s|%s|\n", FileType, TypeList); cptr = TypeList; while (*cptr) { for (sptr = cptr; *sptr && *sptr != ','; sptr++); ch = *sptr; *sptr = '\0'; if (Debug) fprintf (stdout, "|%s|%s|\n", FileType, cptr); if ((SearchTextString (FileType, cptr, false, false, NULL)) != NULL) { *sptr = ch; return (true); } if (*sptr = ch) sptr++; cptr = sptr; } return (false); } /*****************************************************************************/ /* String search allowing wildcard "*" (matching any multiple characters) and "%" (matching any single character). Returns NULL if not found or a pointer to start of matched string. Setting 'ImpliedWildcards' means the 'SearchFor' string is processed as if enclosed by '*' wildcard characters. */ char* SearchTextString ( char *SearchIn, char *SearchFor, BOOL CaseSensitive, BOOL ImpliedWildcards, int *MatchedLengthPtr ) { char *cptr, *sptr, *inptr, *RestartCptr, *RestartInptr, *MatchPtr; /*********/ /* begin */ /*********/ if (Debug) fprintf (stdout, "SearchTextString() |%s|%s|\n", SearchIn, SearchFor); if (MatchedLengthPtr != NULL) *MatchedLengthPtr = 0; if (!*(cptr = SearchFor)) return (NULL); inptr = MatchPtr = SearchIn; if (ImpliedWildcards) { /* skip leading text up to first matching character (if any!) */ if (*cptr != '*' && *cptr != '%') { if (CaseSensitive) while (*inptr && *inptr != *cptr) inptr++; else while (*inptr && toupper(*inptr) != toupper(*cptr)) inptr++; if (Debug && !*inptr) fprintf (stdout, "1. NOT matched!\n"); if (!*inptr) return (NULL); cptr++; MatchPtr = inptr++; } } for (;;) { if (CaseSensitive) { while (*cptr && *inptr && *cptr == *inptr) { cptr++; inptr++; } } else { while (*cptr && *inptr && toupper(*cptr) == toupper(*inptr)) { cptr++; inptr++; } } if (ImpliedWildcards) { if (!*cptr) { if (Debug) fprintf (stdout, "1. matched!\n"); if (MatchedLengthPtr != NULL) *MatchedLengthPtr = inptr - MatchPtr; return (MatchPtr); } } else { if (!*cptr && !*inptr) { if (Debug) fprintf (stdout, "2. matched!\n"); if (MatchedLengthPtr != NULL) *MatchedLengthPtr = inptr - MatchPtr; return (MatchPtr); } if (*cptr != '*' && *cptr != '%') { if (Debug && !*inptr) fprintf (stdout, "3. NOT matched!\n"); return (NULL); } } if (*cptr != '*' && *cptr != '%') { if (!*inptr) { if (Debug) fprintf (stdout, "4. NOT matched!\n"); return (NULL); } cptr = SearchFor; MatchPtr = ++inptr; continue; } if (*cptr == '%') { /* single char wildcard processing */ if (!*inptr) break; cptr++; inptr++; continue; } /* asterisk wildcard matching */ while (*cptr == '*') cptr++; /* an asterisk wildcard at end matches all following */ if (!*cptr) { if (Debug) fprintf (stdout, "5. matched!\n"); while (*inptr) inptr++; if (MatchedLengthPtr != NULL) *MatchedLengthPtr = inptr - MatchPtr; return (MatchPtr); } /* note the current position in the string (first after the wildcard) */ RestartCptr = cptr; for (;;) { /* find first char in SearchIn matching char after wildcard */ if (CaseSensitive) while (*inptr && *cptr != *inptr) inptr++; else while (*inptr && toupper(*cptr) != toupper(*inptr)) inptr++; /* if did not find matching char in SearchIn being searched */ if (Debug && !*inptr) fprintf (stdout, "6. NOT matched!\n"); if (!*inptr) return (NULL); /* note the current position in SearchIn being searched */ RestartInptr = inptr; /* try to match the remainder of the string and SearchIn */ if (CaseSensitive) { while (*cptr && *inptr && *cptr == *inptr) { cptr++; inptr++; } } else { while (*cptr && *inptr && toupper(*cptr) == toupper(*inptr)) { cptr++; inptr++; } } /* if reached the end of both string and SearchIn - match! */ if (ImpliedWildcards) { if (!*cptr) { if (Debug) fprintf (stdout, "7. matched!\n"); if (MatchedLengthPtr != NULL) *MatchedLengthPtr = inptr - MatchPtr; return (MatchPtr); } } else { if (!*cptr && !*inptr) { if (Debug) fprintf (stdout, "8. matched!\n"); if (MatchedLengthPtr != NULL) *MatchedLengthPtr = inptr - MatchPtr; return (MatchPtr); } } /* break to the external loop if we encounter another wildcard */ if (*cptr == '*' || *cptr == '%') break; /* lets have another go */ cptr = RestartCptr; /* starting the character following the previous attempt */ inptr = MatchPtr = RestartInptr + 1; } } } /****************************************************************************/ /* Return an integer reflecting the major and minor version of VMS (e.g. 60, 61, 62, 70, 71, 72, etc.) */ #ifdef ODS_EXTENDED int GetVmsVersion () { static char SyiVersion [16]; static struct { short int buf_len; short int item; void *buf_addr; unsigned short *ret_len; } SyiItems [] = { { 8, SYI$_VERSION, &SyiVersion, 0 }, { 0,0,0,0 } }; int status, version; /*********/ /* begin */ /*********/ if (Debug) fprintf (stdout, "GetVmsVersion()\n"); if (VMSnok (status = sys$getsyiw (0, 0, 0, &SyiItems, 0, 0, 0))) exit (status); SyiVersion[8] = '\0'; version = ((SyiVersion[1]-48) * 10) + (SyiVersion[3]-48); if (Debug) fprintf (stdout, "|%s| %d\n", SyiVersion, version); return (version); } #endif /* ODS_EXTENDED */ /****************************************************************************/ /* Does a case-insensitive, character-by-character string compare and returns true if two strings are the same, or false if not. */ BOOL strsame ( char *sptr1, char *sptr2, int count ) { /*********/ /* begin */ /*********/ if (count > 0) return (strncasecmp (sptr1, sptr2, count) == 0); else return (strcasecmp (sptr1, sptr2) == 0); } /****************************************************************************/