0) { while (list($key) = @each($tags_to_extract)) { $match_part .= "|" . $tags_to_extract[$key]; } $match_part = substr($match_part, 1); } else { // else we use the default extraction $match_part = "href|src|url|location|codebase|background|data|profile|action|open"; } // 1. LINKTEXT (well formed with at the end) // Get the link AND the linktext from these tags // This has to be done FIRST !! preg_match_all("/<[ ]{0,}a[ \n\r][^<>]{0,}(?<= |\n|\r)(?:" . $match_part . ")[ \n\r]{0,}=[ \n\r]{0,}[\"|']{0,1}([^\"'>< ]{0,})[^<>]{0,}>((?:(?!<[ \n\r]*\/a[ \n\r]*>).)*)<[ \n\r]*\/a[ \n\r]*>/ is", $source, $regs); // regs[0] -> complete tags // regs[1] -> the links (raw) // regs[2] -> linktext for ($x = 0; $x < count($regs[1]); $x ++) { $tmp_array["link_raw"] = trim($regs[1][$x]); $tmp_array["linktext"] = $regs[2][$x]; $tmp_array["linkcode"] = trim($regs[0][$x]); $map_key = $tmp_array["link_raw"]; if (! isset($map_array[$map_key])) { $target_array[] = $tmp_array; $map_array[$map_key] = true; } } // Now we "preg" all other matches // 2. all like <..href="..."> <..src=".."> and so on $pregs[] = "/<[^<>]{0,}[ \n\r](?:" . $match_part . ")[ \n\r]{0,}=[ \n\r]{0,}[\"|']{0,1}([^\"'>< ]{0,})[^<>]{0,}>/ is"; // Now, if agressive_mode is set to true, we look for some // other things if ($aggressive_mode == true) { // Everyhtnig inside OR outside a tag // "=" or "(" after tag $pregs[] = "/[ \.:;](?:" . $match_part . ")[ \n\r]{0,}[=|\(][ \n\r]{0,}[\"|']{0,1}([^\"'>< ;]{0,})['\"<> ;]/ is"; // Stuff like ..open="("...") // currently in the expression above // slowed down the whole thing..but returned better "linkcode" // $pregs[]="/(?:".$match_part.")[ \n\r]{0,}\([ \n\r]{0,}(?:\"|'){1}([^\"'><\n ]{0,})(\"|')[^)]*\)/ is"; } // Now execute the pregs for ($x = 0; $x < count($pregs); $x ++) { unset($regs); preg_match_all($pregs[$x], $source, $regs); for ($y = 0; $y < count($regs[1]); $y ++) { unset($tmp_array); $tmp_array["link_raw"] = trim($regs[1][$y]); $tmp_array["linkcode"] = trim($regs[0][$y]); $tmp_array["linktext"] = ""; $map_key = $tmp_array["link_raw"]; if (! isset($map_array[$map_key])) { $target_array[] = $tmp_array; $map_array[$map_key] = true; } } } return $target_array; } } // End ^ LF ^ encoding