'30', 'button' => FmtPageName(' $[Search] ', $pagename), 'searchlabel' => FmtPageName('$[Search for ]', $pagename), 'pageslabel' => FmtPageName('$[On pages]', $pagename), 'caselabel' => FmtPageName('$[Match case]', $pagename), 'phraselabel' => FmtPageName('$[Match phrase]', $pagename), 'wordlabel' => FmtPageName('$[Match whole word]', $pagename), 'regexlabel' => FmtPageName('$[Regular expression]', $pagename), )); // defaults array SDVA($TextExtractOpt, array ( 'markup' => 'cut', //code, text, source, on 'unit' => 'dsent', //page, para, line, sent, dline, dsent 'highlight'=> 'yellow', //background color, 'bold', 'none' 'linenum-color' => 'green', 'matchnum-color' => 'green', 'pagenum-color' => 'green', 'title' => 'Search results for ', //XL('Text Extract'), 'header' => 'full', 'phead' => 'link', 'linewrap' => 1, 'rowspacing' => '0.5em', 'case' => 0, 'phrase' => 0, 'regex' => 0, 'strict' => 1, 'serial' => 0, 'error' => 1, 'timer' => 0, 'pagenum' => 0, 'matchnum' => 0, 'linenum' => 0, 'textlinks' => 0, 'linktext' => 'blue', 'shorten' => 0, 'lwords' => 5, 'rwords' => 10, 'ellipsis' => '…', )); // main function for text extract processing function TextExtract($pagename, $list, $opt = NULL) { global $TextExtractOpt, $TEModeDefaults, $TextExtract, $TextExtractExclude, $FmtV, $HTMLStylesFmt, $KeepToken, $KPV, $PageListArgPattern; ##DEBUG echo "
LIST "; print_r($list); echo ""; foreach($opt as $k => $v) { if (is_array($v)) foreach($v as $kk =>$vv) $opt[$k][$kk] = stripmagic($vv); else $opt[$k] = stripmagic($v); } //internal arg array $par = array(); //start time StopWatch('TextExtract start'); if ($opt['stime']) $par['stime'] = $opt['stime']; else $par['stime'] = strtok(microtime(), ' ') + strtok(''); /*/set default options foreach ($TEModeDefaults as $mode => $ar ) { foreach ($ar as $k => $val) if (isset($opt['markup']) && $opt['markup']==$mode && !$opt[$k]) $opt[$k] = $val; }*/ $opt = array_merge($TextExtractOpt, $opt); switch ($opt['unit']) { case 'sentence': $opt['unit'] = 'sent'; break; case 'paragraph': $opt['unit'] = 'para'; break; case 'dline': $opt['unit'] = 'line'; $opt['double'] = 1; break; case 'dsent': $opt['unit'] = 'sent'; $opt['double'] = 1; break; } if($opt['markup']=='text') $opt['textlinks'] = 1; //for 'text' mode linksshown as text ##DEBUG echo "
OPT "; print_r($opt); echo ""; //input parameter check if (!in_array($opt['unit'], array('line','para','page','sent')) OR !in_array($opt['markup'], array('code','cut','source','text','on'))) return "%red%$[Error: check input parameters!]"; foreach((array)@$opt['+'] as $i) $opt[''][] = $i; if (!isset($opt['']) && !isset($opt['pattern'])) return '%red%$[Error: search term missing!]'; //term is regular expression if ($opt['regex']==1) { $par['pat'] = $pat = $par['pattern'] = $opt[''][0] = $opt['pattern']; //exclude various input patterns SDVA($TextExtractExclude, array("*","?","+","(",")","[","]","^","$","|","??","\\")); foreach($TextExtractExclude as $v) if($pat==$v) return '%red%$[Error: disallowed character input!]'; } //term is phrase else if ($opt['phrase']==1) { $par['pattern'] = $terms = implode(" ", $opt['']); #$pat = ($opt['word']==1)? '\\b'.$terms.'\\b' : $terms; $pat = ($opt['word']==1)? '(? $pt) #$opt[''][$i] = '\\b'.$pt.'\\b'; $opt[''][$i] = '(?pat: ".$pat; $HTMLStylesFmt['teimages'] = " .image {max-width:10em; } "; //always wrap lines when displaying preformatted 'source' code if ($opt['markup']=='source') $opt['linewrap'] = 1; // wrap lines of preformatted text and code if($opt['linewrap']==1) { # whitespace wrap (perhaps copy styles to css stylesheet) $HTMLStylesFmt['prewrap'] = " code, div.te-results pre, div.te-results code, code.escaped, pre.escaped { white-space: pre-wrap; padding-left: 1em; } "; } if($opt['rowspacing']!=0) { $HTMLStylesFmt['rowspacing'] = ".spacer { min-height: {$opt['rowspacing']};} p.vspace {height:0;}"; } //setting keep values here, and keeptokens directly in TEHighLight() //instead of calling Keep again and again switch ($opt['highlight']) { case 'none': $KPV['01-TE'] = $KPV['02-TE'] = ""; break; case 'bold': $KPV['01-TE'] = ""; $KPV['02-TE'] = ""; break; case '1': default: $KPV['01-TE'] = ""; $KPV['02-TE'] = ""; $HTMLStylesFmt['te-hilight'] = " .te-hilight { background-color: {$opt['highlight']}; } "; } $par['hitoklen'] = 2* (5 + 2 * strlen($KeepToken)); // 2* ( KeepToken-length + KPV-key-length + KeepToken-length ) $KPV['03-TE'] = "
NEW "; print_r($new); echo ""; //output text from array of rows, adding page prefix header (and footer) $out = ''; foreach ($new as $i => $ar) { //markup pageheader if($opt['phead']) $out .= MarkupToHTML($pagename, $new[$i]['phead']); //add vspace foreach($new[$i]['rows'] as $k => $r) { if(isset($new[$i]['rows'][$k])) { $new[$i]['rows'][$k] = TEVSpace($r, $par, $opt); //add vertical spacing } } //markup rows $rnew = implode("\n", $new[$i]['rows']); global $LinkFunctions; if ($opt['textlinks']==1) { $lf = $LinkFunctions; foreach($LinkFunctions as $k => $v) $LinkFunctions[$k] = 'TELinkText'; } $out .= ($opt['markup']=='source') ? "
".$rnew."
"
: MarkupToHTML($pagename, $rnew);
if ($opt['textlinks']==1) $LinkFunctions = $lf;
//markup pagefooter
if (isset($opt['pfoot']))
$out .= MarkupToHTML($pagename, $new[$i]['pfoot']);
}
//stop timer
TEStopwatch($par);
//make header and footer
$header = TEHeader($opt, $par);
$header = MarkupToHTML($pagename, $header);
$footer = TEFooter($opt, $par);
$footer = MarkupToHTML($pagename, $footer);
$out = $header."OTHER "; print_r($m[0]); echo ""; $k = 0; $mpos = array(); foreach($m[0] as $i => $v) { if (!preg_match("({$par['pat']})".$par['qi'], $v[0])) continue; if (isset($m[4]) && preg_match("/$LinkPattern/",$m[4][$i][0])) $item = $v[0]." "; else $item = $v[0]; $pos = $v[1] + $k * $par['hitoklen']; $row = substr_replace($row, $KeepToken."01-TE".$KeepToken.$item.$KeepToken."02-TE".$KeepToken, $pos, strlen($item)); $row = rtrim($row,'% '); $k++; $mpos[] = $pos; } if ($opt['shorten']>0 && $opt['markup']!='source') $row = TEShortenRow($row, $par, $opt); } return $row; } //}}} function TEVSpace($row, $par, $opt) { global $HTMLPNewline; if ($opt['markup']=='source') return trim($row); if($HTMLPNewline !='') return $row; if($opt['shorten']>0) { $HTMLPNewline = ''; return $row.$par['vspace']; } else return $row.$par['br-tag']; } //}}} //shorten row function TEShortenRow($row, $par, $opt) { global $KeepToken; //number of words left and right of highlight $a = ($opt['shorten']>1) ? $opt['shorten'] : $opt['lwords']; $b = ($opt['shorten']>1) ? 2*$opt['shorten'] : $opt['rwords']; $hi = $new = array(); $words = explode(' ', $row); foreach ($words as $i => $wd) if (strpos($wd, $KeepToken)!==false) $hi[] = $i; for ($i=0; $i < count($words); $i++) { foreach ($hi as $k => $n) { if (($n-$a) > $i) { if (($n-$a) == $i+1) if (!isset($new[$i])) $new[$i] = $opt['ellipsis']; if (isset($new[$i-1]) && $new[$i-1]!=$opt['ellipsis']) $new[$i] = $opt['ellipsis']; continue 2; } if ($i == end($hi)+$b+1) $new[$i] = $opt['ellipsis']; if ($i > $n+$b) continue; if(isset($hi[$k+1]) && $i==($hi[$k+1]-$a)) continue; if (isset($new[$i])) continue 2; $new[$i] = $words[$i]; continue 2; } } $row = implode(' ', $new); return $row; } //}}} //make header function TEHeader(&$opt, $par) { $cnt = $par['matchnum']; $out = ""; if ($opt['header']) $out .= "(:div001 class='te-header':)\n"; switch($opt['header']) { default: $out .= TEVarReplace($opt['header'], $par); break; case 'count': case 'counter': $out .= "'''$[Results:] $cnt'''"; break; case 'all': case 'full': $time = ($opt['timer']) ? 'in '.$par['time'] : ''; $pgs = ($par['listcnt']>1) ? '$[pages]' : '$[page]'; $from = "$[from] {$par['listcnt']} $pgs $[searched]"; if ($par['pagecnt']>1) $from = "$[on] {$par['pagecnt']} $[pages] ".$from; $out .= "[[#extracttop]]%lfloat%[+ '''{$opt['title']} %green%{$par['pattern']}%%''' +] %right%''{$cnt} $[results] {$from} {$time}''"; $opt['footer'] = "%center% '''$[End of] {$opt['title']}''' [[#extracttop|$[(start)]]]"; break; } if ($opt['header']) $out .= "\n(:div001end:)"; return $out; } //}}} //make footer function TEFooter($opt, $par) { $out = ''; if ($opt['footer'] && $par['pagecnt']>0) { $out .= "\n(:div002 class='te-footer':)".TEVarReplace($opt['footer'], $par)."\n(:div002end:)"; } if($opt['error']==1) { $error = ($par['pagecnt']==0) ? "\n%red%$[Found no matches!]%%" : ''; $error = ($par['listcnt']==0) ? "\n%red%$[Error: no pages to be searched!]%%" : ''; $out .= $error; } return $out; } //}}} //make page header function TEPageHeader($pagename, $source, $opt, &$par) { $pnum = ($opt['pagenum']==1) ? ($par['pagenum']).". " : ''; $out = "\n>>te-pageheader<<\n"; if($opt['phead']=='link') { if($opt['pagenum']==1 && $opt['pagenum-color']!='') $out .= "'''%color={$opt['pagenum-color']}%{$pnum}%% [+ [[$source]] +]'''"; else $out .= "'''[+ [[$source]] +]'''"; } elseif($opt['phead']=='linkmod' ) { $lmod = PageVar($source,'$LastModified'); $lmby = PageVar($source,'$LastModifiedBy'); $out .= "%rfloat%''$[last modified by] [[~{$lmby}]] $[on] {$lmod}'' %left%'''%color={$opt['pagenum-color']}%{$pnum}%%[+ [[$source]] +]'''"; } else { $out .= TEVarReplace($opt['phead'], $par); } $out .= "\n>><<\n"; return $out; } //}} //make page footer function TEPageFooter($pagename, $source, $opt, &$par) { $out = "\n".$opt['pfoot']; return $out; } //}} //make results (line) numbers function TERowNumbers($opt, $par) { #show($par,'PAR'); $new = ''; if ($opt['linenum']==1) { if ($opt['pagenum']==1) { $new = Keep("{$par['pagenum']}.{$par['linenum']}. ",'T'); } else $new = Keep("{$par['linenum']}. ",'T'); } else if ($opt['matchnum']==1 && $par['pat']!=".") { if ($opt['pagenum']==1) { if ($par['rowmatchcnt']>1) $num = ($par['prevpmnum']+1)."-".$par['pmatchnum']; else $num = $par['pmatchnum']; $new = Keep("{$par['pagenum']}.$num. ",'T'); } else { if ($par['rowmatchcnt']>1) $num = ($par['prevmnum']+1)."-".$par['matchnum']; else $num = $par['matchnum']; $new = Keep("$num. ",'T'); } } return $new; } //}}} //substitution of pseudo template variables function TEVarReplace ($text, $par) { foreach($par as $k => $v) { if (is_array($v)) continue; $text = str_replace('{$$'.$k.'}' , $v, $text); } return $text; } //}}} //Link function to suppress links function TELinkText($pagename,$imap,$path,$title,$txt,$fmt=NULL) { return "".$txt."".$title.""; } //}}} //timer function TEStopwatch(&$par) { $wtime = strtok(microtime(), ' ') + strtok('') - $par['stime']; $xtime = sprintf("%04.2f %s", $wtime, ''); //time in secs $par['time'] = $xtime." $[seconds]"; } //}}} // markup (:extract ....:) search form Markup('extractform', 'directives','/\\(:extract\\s*(.*?)\\s*:\\)/', "TEFormMarkup"); // extractor search form function TEFormMarkup($m) { global $ExtractFormOpt, $InputValues, $EnablePathInfo,$ExtractFormInputType; extract($GLOBALS['MarkupToHTML']); $opt = ParseArgs($m[1]); if (isset($opt['page'])) $hiddenpagefield = 1; $opt = array_merge((array)$ExtractFormOpt, @$_GET, (array)$opt); $opt['action'] = 'search'; $opt['fmt'] = 'extract'; $target = (isset($opt['target'])) ? MakePageName($pagename, $opt['target']) : $pagename; $opt['n'] = IsEnabled($EnablePathInfo, 0) ? '' : $target; foreach ($opt as $k => $v) { if ($v == '' || is_array($v)) continue; $v = str_replace("'", "'", $v); $opt[$k] = $v; if (!isset($InputValues[$k])) $InputValues[$k] = $v; } if(!isset($InputValues['q'])) $InputValues['q'] = ''; if (!isset($InputValues['q']) && isset($opt['pattern'])) $InputValues['q'] = $opt['pattern']; if (!isset($InputValues['name']) && isset($opt['defaultpage'])) $InputValues['name'] = $opt['defaultpage']; else $InputValues['name'] = ''; $checkword = (isset($InputValues['word']))? "checked=1" : ''; $checkcase = (isset($InputValues['case']))? "checked=1" : ''; $checkphrase = (isset($InputValues['phrase']))? "checked=1" : ''; $checkregex = (isset($InputValues['regex']))? "checked=1" : ''; SDV($ExtractFormInputType, 'text'); //form $out = FmtPageName(""; return Keep($out); } //}}} ## (extract ......) same as PowerTools (pagelist.... fmt=extract) [all pagelist parameters allowed] $MarkupExpr['extract'] = 'MxTextExtract($pagename, $argp, $args)'; function MxTextExtract($pagename, $argp, $args) { StopWatch('extract start'); unset($argp['#']); $opt['fmt'] = 'extract'; foreach($argp as $k => $v) $opt[$k] = $v; foreach($args as $k => $v) $opt['q'] .= ' "'.$v.'"'; $out = FmtPageList('$MatchList', $pagename, $opt, 0); $out = preg_replace("/[\n]+/s","\n",$out); StopWatch('extract end'); return $out; } //}}} //fmt=extract for (:extract:) and (:pagelist:) and (:searchbox:) SDV($FPLFormatOpt['extract'], array('fn' => 'FPLTextExtract')); function FPLTextExtract($pagename, &$matches, $opt) { ##DEBUG echo "
OPT "; print_r($opt); echo ""; global $FmtV, $EnableStopWatch, $KeepToken, $KPV, $PageListFilters; $PageListFilters['PageListTermsTargets'] = -10; //not used $PageListFilters['TEListTermsTargets'] = 160; //used as alternative $EnableStopWatch = 1; StopWatch('TextExtract pagelist begin'); $opt['stime'] = strtok(microtime(), ' ') + strtok(''); $opt['q'] = ltrim($opt['q']); //if search term contains terms in double quotes switch on 'text' option to remove all inline markup when searching if (preg_match('/\\".*\\"/',$opt['q'])) $opt['text'] = 1; if (@$opt['']) foreach ($opt[''] as $k => $v) $opt[''][$k] = htmlspecialchars_decode($v); //treat single . search term as request for regex 'all characters' if(isset($opt[''][0]) && $opt[''][0]=='.') $opt['regex'] = 1; if(isset($opt['pattern']) && $opt['pattern']=='.') $opt['regex'] = 1; //MakePageList() does not evaluate terms as regular expressions, so we save them for later if (@$opt['regex']==1) { $opt['pattern'] = implode(' ', $opt['']); unset($opt['']); } if (!isset($opt['name']) && isset($opt['page'])) $opt['name'] = $opt['page']; elseif (isset($opt['name']) && isset($opt['page'])) $opt['name'] .= ",".$opt['page']; if (isset($opt['name'])) unset($opt['page']); //allow search of anchor sections if (isset($opt['name'])) { if($sa=strpos($opt['name'],'#')) { $opt['section'] = strstr($opt['name'],'#'); $opt['name'] = substr($opt['name'],0,$sa); } } //unset excludes for page matching, deal with them on unit basis later $excl = ''; if(isset($opt['strict']) && $opt['strict']==1) { if (isset($opt['-']) && $opt['unit']!='page') { $excl = $opt['-']; unset($opt['-']); } } //create page list by searching pages for search terms $list = MakePageList($pagename, $opt, 0); if (!isset($opt['-'])) $opt['-'] = $excl; //add excludes again #DEBUG echo "
list after MakePageList "; print_r($list); echo ""; //extract page subset according to 'count=' parameter if (@$opt['count'] && !$opt['section']) TESliceList($list, $opt); return TextExtract($pagename, $list, $opt); } //}}} //alternative for PageListTermsTargets with hook to TERemoveInlineMarkup for option 'text' //this allows page matches to a search phrase even if part of the phrase is enclosed with inline markup function TEListTermsTargets(&$list, &$opt, $pn, &$page) { global $FmtV; static $reindex = array(); $fold = $GLOBALS['StrFoldFunction']; switch ($opt['=phase']) { case PAGELIST_PRE: $FmtV['$MatchSearched'] = count($list); $incl = array(); $excl = array(); foreach((array)@$opt[''] as $i) { $incl[] = $fold($i); } foreach((array)@$opt['+'] as $i) { $incl[] = $fold($i); } foreach((array)@$opt['-'] as $i) { $excl[] = $fold($i); } $indexterms = PageIndexTerms($incl); foreach($incl as $i) { $delim = (!preg_match('/[^\\w\\x80-\\xff]/', $i)) ? '$' : '/'; $opt['=inclp'][] = $delim . preg_quote($i,$delim) . $delim . 'i'; } if ($excl) $opt['=exclp'][] = '$'.implode('|', array_map('preg_quote',$excl)).'$i'; if (@$opt['link']) { $link = MakePageName($pn, $opt['link']); $opt['=linkp'] = "/(^|,)$link(,|$)/i"; $indexterms[] = " $link "; } if (@$opt['=cached']) return 0; if ($indexterms) { StopWatch("PageListTermsTargets begin count=".count($list)); $xlist = PageIndexGrep($indexterms, true); $list = array_diff($list, $xlist); StopWatch("PageListTermsTargets end count=".count($list)); } if (@$opt['=inclp'] || @$opt['=exclp'] || @$opt['=linkp']) return PAGELIST_ITEM|PAGELIST_POST; return 0; case PAGELIST_ITEM: if (!$page) { $page = ReadPage($pn, READPAGE_CURRENT); $opt['=readc']++; } if (!$page) return 0; if (@$opt['=linkp'] && !preg_match($opt['=linkp'], @$page['targets'])) { $reindex[] = $pn; return 0; } if (@$opt['=inclp'] || @$opt['=exclp']) { $text = $fold($pn."\n".@$page['targets']."\n".@$page['text']); if (isset($opt['text']) && $opt['text']==1) $text = TERemoveInlineMarkup($text); foreach((array)@$opt['=exclp'] as $i) if (preg_match($i, $text)) return 0; foreach((array)@$opt['=inclp'] as $i) if (!preg_match($i, $text)) { if ($i[0] == '$') $reindex[] = $pn; return 0; } } return 1; case PAGELIST_POST: if ($reindex) PageIndexQueueUpdate($reindex); $reindex = array(); return 0; } } //}}} //slice list for count= option function TESliceList(&$list, $opt) { list($r0, $r1) = CalcRange($opt['count'], count($list)); if ($r1 < $r0) $list = array_reverse(array_slice($list, $r1-1, $r0-$r1+1)); else $list = array_slice($list, $r0-1, $r1-$r0+1); } //}}} //sort by match count and subsort by name function TESort(&$new) { usort($new,"TESortByMatchCnt"); $anew = $temp = array(); $cnt = count($new); for ($i=0; $i<$cnt; $i++) { $temp[] = $new[$i]; if (($new[$i]['pmatchcnt'] > $new[$i+1]['pmatchcnt']) || $i+1==$cnt) { if (count($temp)>1) usort($temp, "TESortByName"); $anew = array_merge($anew, $temp); unset($temp); } } $new = $anew; } //}}} //is_countable substitute for php versions <7.3 if (!function_exists('is_countable')) { function is_countable($c) { return is_array($c) || $c instanceof Countable; } } //sort helper functions function TESortByMatchCnt($a, $b) { return $b['pmatchcnt'] - $a['pmatchcnt']; } function TESortByName($a, $b) { return strnatcasecmp($a['name'], $b['name']); } //EOF