0) {
while (list($key) = @each($tags_to_extract)) {
$match_part .= "|" . $tags_to_extract[$key];
}
$match_part = substr($match_part, 1);
} else {
// else we use the default extraction
$match_part = "href|src|url|location|codebase|background|data|profile|action|open";
}
// 1. LINKTEXT (well formed with at the end)
// Get the link AND the linktext from these tags
// This has to be done FIRST !!
preg_match_all("/<[ ]{0,}a[ \n\r][^<>]{0,}(?<= |\n|\r)(?:" . $match_part . ")[ \n\r]{0,}=[ \n\r]{0,}[\"|']{0,1}([^\"'>< ]{0,})[^<>]{0,}>((?:(?!<[ \n\r]*\/a[ \n\r]*>).)*)<[ \n\r]*\/a[ \n\r]*>/ is", $source, $regs);
// regs[0] -> complete tags
// regs[1] -> the links (raw)
// regs[2] -> linktext
for ($x = 0; $x < count($regs[1]); $x ++) {
$tmp_array["link_raw"] = trim($regs[1][$x]);
$tmp_array["linktext"] = $regs[2][$x];
$tmp_array["linkcode"] = trim($regs[0][$x]);
$map_key = $tmp_array["link_raw"];
if (! isset($map_array[$map_key])) {
$target_array[] = $tmp_array;
$map_array[$map_key] = true;
}
}
// Now we "preg" all other matches
// 2. all like <..href="..."> <..src=".."> and so on
$pregs[] = "/<[^<>]{0,}[ \n\r](?:" . $match_part . ")[ \n\r]{0,}=[ \n\r]{0,}[\"|']{0,1}([^\"'>< ]{0,})[^<>]{0,}>/ is";
// Now, if agressive_mode is set to true, we look for some
// other things
if ($aggressive_mode == true) {
// Everyhtnig inside OR outside a tag
// "=" or "(" after tag
$pregs[] = "/[ \.:;](?:" . $match_part . ")[ \n\r]{0,}[=|\(][ \n\r]{0,}[\"|']{0,1}([^\"'>< ;]{0,})['\"<> ;]/ is";
// Stuff like ..open="("...")
// currently in the expression above
// slowed down the whole thing..but returned better "linkcode"
// $pregs[]="/(?:".$match_part.")[ \n\r]{0,}\([ \n\r]{0,}(?:\"|'){1}([^\"'><\n ]{0,})(\"|')[^)]*\)/ is";
}
// Now execute the pregs
for ($x = 0; $x < count($pregs); $x ++) {
unset($regs);
preg_match_all($pregs[$x], $source, $regs);
for ($y = 0; $y < count($regs[1]); $y ++) {
unset($tmp_array);
$tmp_array["link_raw"] = trim($regs[1][$y]);
$tmp_array["linkcode"] = trim($regs[0][$y]);
$tmp_array["linktext"] = "";
$map_key = $tmp_array["link_raw"];
if (! isset($map_array[$map_key])) {
$target_array[] = $tmp_array;
$map_array[$map_key] = true;
}
}
}
return $target_array;
}
}
// End ^ LF ^ encoding