= 0x81 and ord($string[$i]) <= 0xfe) { $h = ord($string[$i]); if (isset($string[$i + 1])) { $i ++; $l = ord($string[$i]); if (isset(self::$_PinyinTable[$h][$l])) { array_push($flow, self::$_PinyinTable[$h][$l]); } else { array_push($flow, $h); array_push($flow, $l); } } else { array_push($flow, ord($string[$i])); } } else { array_push($flow, ord($string[$i])); } } $pinyin = ''; if (count($flow) > 0) { if (! $permutationAndCombination) { foreach ($flow as $val) { if (is_array($val)) { $pinyin .= ucfirst($val[0]); } else { $pinyin .= chr($val); } } } else { foreach ($flow as $key => $val) { if (! is_array($val)) { $flow[$key] = array( chr($val)); } else { $flow[$key] = array_map('ucfirst', $val); } } $pc = Zeed_Util_Zh::permutationAndCombination($flow); $pinyin = array(); foreach ($pc as $p) { $pinyin[] = implode('', $p); } } } return $pinyin; } /** * 将指定的字符串转为GBK * * 编码 代码页 简介 * GB2312 CP20936 收录文字6763个(简体中文) * GBK CP936 收录文字21003个(包括简体、繁体、日文、朝鲜文。兼容GB2312。) * GB18030 CP54936 收录文字27533个(包括简体、繁体、少数民族文字,日文、朝鲜文。兼容GBK,不兼容BIG5。) * BIG5 CP950 收录文字13053个(繁体中文) * BIG-5 * 《Unicode、GB2312、GBK和GB18030中的汉字》: http://www.fmddlmyy.cn/text24.html * * @param string $string * @return string */ public static function convert2GBK($string) { if (NULL != $cs = mb_detect_encoding($string, array( 'UTF-8', 'GBK', 'BIG5'))) { if ($cs != 'CP936' || $cs != 'CP54936') { $string = iconv($cs, 'GBK', $string); } } return $string; } /** * 判断中文字符集(UTF-8/GBK/BIG5) * * http://php.net/manual/en/mbstring.supported-encodings.php * http://www.gnu.org/software/libiconv/ * http://www.unicode.org/ * http://www.fmddlmyy.cn/text24.html * * @param string $string * @return unknown */ public static function isUGB($string) { if (function_exists('mb_detect_encoding')) { if (NULL != $cs = mb_detect_encoding($string, array( 'UTF-8', 'BIG5', 'GBK'))) { switch($cs) { case 'CP20936' : return 'GBK'; case 'CP936' : return 'GBK'; case 'CP54936' : return 'GBK'; case 'CP950' : return 'BIG5'; case 'BIG-5' : return 'BIG5'; case 'UTF-8' : return 'UTF-8'; default : return $cs; } } return NULL; } return self::_isUGB($string); } /** * 编码 第一字节 第二字节 * GB2312 0xB0-0xF7(176-247) 0xA0-0xFE(160-254) * GBK 0x81-0xFE(129-254) 0x40-0xFE(64-254) * BIG5 0x81-0xFE(129-255) 0x40-0x7E(64-126),0xA1-0xFE(161-254) * * 一般是这样辨别GBK/BIG5的 * 1、GBK的内码的两个字节都是从A0H-FEH之间的; * 2、BIG5的内码的第一个字节是80H-FFH,第二个字节是00H-FFH; * * @param string $strtext * @return string 返回:UTF-8/GBK/BIG5/null */ public static function _isUGB($string) { $UGB = null; $length = strlen($string); for($i = 0; $i < $length; $i ++) { if (($ch1 = ord($string[$i])) > 0xE0) { // UTF-8 return "UTF-8"; } elseif ($ch1 >= 0x81) { // 中文 $ch2 = ord($string[$i + 1]); /** if ($ch1 >= 0xB0 && $ch1 <= 0xF7 && $ch2 >= 0xA0 && $ch2 <= 0xFE) { // GB2312 $GB2312found = true; } else { $GB2312found = false; } */ if ($ch1 >= 0x81 && $ch1 <= 0xFE && (($ch2 >= 0x40 && $ch2 <= 0x7E) || ($ch2 >= 0xA1 && $ch2 <= 0xFE))) { //BIG5 $BIG5found = true; } else { $BIG5found = false; } if ($ch1 >= 0x81 && $ch1 <= 0xFE && $ch2 >= 0x40 && $ch2 <= 0xFE) { // GBK $GBKfound = true; } else { $GBKfound = false; } if ($BIG5found && $GBKfound) { if ($ch1 > 0xA0 && $ch1 < 0xFE && $ch2 > 0xA0 && $ch2 < 0xFE) { // GBK汉字两个字节都是从A0H-FEH之间的 $UGB = 'GBK'; } if ($ch2 < 0x7F) { // 看第二个字节是否小于0x7F,如果是的的话,一般是BIG5。 return 'BIG5'; } // 检查下一个字 $i ++; continue; } else { return $BIG5found ? 'BIG5' : 'GBK'; } } } return $UGB; } /** * 废弃 */ private static function _isUGB_deprecated($strtext) { $UGB = NULL; $length = strlen($strtext); for($i = 0; $i < $length; $i ++) { //先判断UTF-8,UTF-8的是三个字节, GBK,BIG5是两个字节,可以分离 if (ord(substr($strtext, $i)) > 0xE0) { $UGB = "UTF-8"; break; } elseif (ord(substr($strtext, $i)) > 0xA1) { $UGB = "BIG5"; break; } elseif (ord(substr($strtext, $i)) > 0x80) { $UGB = "GBK"; break; } } return $UGB; } /** * From http://w3.org/International/questions/qa-forms-utf-8.html */ public static function isUTF8($string) { return preg_match('%^(?: [\x09\x0A\x0D\x20-\x7E] # ASCII | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3 | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15 | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16 )*$%xs', $string); } /** * 要解决的数学问题:算出C(a,1) * C(b, 1) * ... * C(n, 1)的组合情况,其中C(n, 1)代表从n个元素里任意取一个元素 * * 要解决的实际问题样例:某年级有m个班级,每个班的人数不同,现在要从每个班里抽选一个人组成一个小组, * 由该小组来代表该年级参加学校的某次活动,请给出所有可能的组合 * * 需要进行排列组合的数组 * 数组说明:该数组是一个二维数组,第一维索引代表班级编号,第二维索引代表学生编号 * * @param array $CombinList 二维数组 * @return array */ public static function permutationAndCombination($CombinList) { /* $CombinList = array( 1 => array( "Student10", "Student11"), 2 => array( "Student20", "Student21", "Student22"), 3 => array( "Student30"), 4 => array( "Student40", "Student41", "Student42", "Student43")); */ /* 计算C(a,1) * C(b, 1) * ... * C(n, 1)的值 */ $CombineCount = 1; foreach ($CombinList as $Value) { $CombineCount *= count($Value); } $Result = array(); $RepeatTime = $CombineCount; foreach ($CombinList as $ClassNo => $StudentList) { // $StudentList中的元素在拆分成组合后纵向出现的最大重复次数 $RepeatTime = $RepeatTime / count($StudentList); $StartPosition = 1; // 开始对每个班级的学生进行循环 foreach ($StudentList as $Student) { $TempStartPosition = $StartPosition; $SpaceCount = $CombineCount / count($StudentList) / $RepeatTime; for($J = 1; $J <= $SpaceCount; $J ++) { for($I = 0; $I < $RepeatTime; $I ++) { $Result[$TempStartPosition + $I][$ClassNo] = $Student; } $TempStartPosition += $RepeatTime * count($StudentList); } $StartPosition += $RepeatTime; } } return $Result; } } // End ^ LF ^ UTF-8