* @copyright (c) 2001-2006 Bitflux GmbH * * @param string string to sanitize * @return string */ public static function xss_clean($str) { // http://svn.bitflux.ch/repos/public/popoon/trunk/classes/externalinput.php // +----------------------------------------------------------------------+ // | Copyright (c) 2001-2006 Bitflux GmbH | // +----------------------------------------------------------------------+ // | Licensed under the Apache License, Version 2.0 (the "License"); | // | you may not use this file except in compliance with the License. | // | You may obtain a copy of the License at | // | http://www.apache.org/licenses/LICENSE-2.0 | // | Unless required by applicable law or agreed to in writing, software | // | distributed under the License is distributed on an "AS IS" BASIS, | // | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or | // | implied. See the License for the specific language governing | // | permissions and limitations under the License. | // +----------------------------------------------------------------------+ // | Author: Christian Stocker | // +----------------------------------------------------------------------+ // // Kohana Modifications: // * Changed double quotes to single quotes, changed indenting and spacing // * Removed magic_quotes stuff // * Increased regex readability: // * Used delimeters that aren't found in the pattern // * Removed all unneeded escapes // * Deleted U modifiers and swapped greediness where needed // * Increased regex speed: // * Made capturing parentheses non-capturing where possible // * Removed parentheses where possible // * Split up alternation alternatives // * Made some quantifiers possessive // Fix &entity\n; $str = str_replace(array( '&', '<', '>'), array( '&amp;', '&lt;', '&gt;'), $str); $str = preg_replace('/(&#*\w+)[\x00-\x20]+;/u', '$1;', $str); $str = preg_replace('/(&#x*[0-9A-F]+);*/iu', '$1;', $str); $str = html_entity_decode($str, ENT_COMPAT, 'UTF-8'); // Remove any attribute starting with "on" or xmlns $str = preg_replace('#(<[^>]+?[\x00-\x20"\'])(?:on|xmlns)[^>]*+>#iu', '$1>', $str); // Remove javascript: and vbscript: protocols $str = preg_replace('#([a-z]*)[\x00-\x20]*=[\x00-\x20]*([`\'"]*)[\x00-\x20]*j[\x00-\x20]*a[\x00-\x20]*v[\x00-\x20]*a[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iu', '$1=$2nojavascript...', $str); $str = preg_replace('#([a-z]*)[\x00-\x20]*=([\'"]*)[\x00-\x20]*v[\x00-\x20]*b[\x00-\x20]*s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:#iu', '$1=$2novbscript...', $str); $str = preg_replace('#([a-z]*)[\x00-\x20]*=([\'"]*)[\x00-\x20]*-moz-binding[\x00-\x20]*:#u', '$1=$2nomozbinding...', $str); // Only works in IE: $str = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?expression[\x00-\x20]*\([^>]*+>#i', '$1>', $str); $str = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?behaviour[\x00-\x20]*\([^>]*+>#i', '$1>', $str); $str = preg_replace('#(<[^>]+?)style[\x00-\x20]*=[\x00-\x20]*[`\'"]*.*?s[\x00-\x20]*c[\x00-\x20]*r[\x00-\x20]*i[\x00-\x20]*p[\x00-\x20]*t[\x00-\x20]*:*[^>]*+>#iu', '$1>', $str); // Remove namespaced elements (we do not need them) $str = preg_replace('#]*+>#i', '', $str); do { // Remove really unwanted tags $old = $str; $str = preg_replace('#]*+>#i', '', $str); } while ($old !== $str); return $str; } /** * Remove image tags from a string. * * @param string string to sanitize * @return string */ public static function strip_image_tags($str) { return preg_replace('#\s]*)["\']?[^>]*)?>#is', '$1', $str); } /** * Remove PHP tags from a string. * * @param string string to sanitize * @return string */ public static function encode_php_tags($str) { return str_replace(array( ''), array( '<?', '?>'), $str); } } // End ^ LF ^ UTF-8