ajout de la partie slam dans le dossier web
This commit is contained in:
		
							
								
								
									
										204
									
								
								ap23/web/doku/inc/Utf8/Clean.php
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										204
									
								
								ap23/web/doku/inc/Utf8/Clean.php
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,204 @@ | ||||
| <?php | ||||
|  | ||||
| namespace dokuwiki\Utf8; | ||||
|  | ||||
| /** | ||||
|  * Methods to assess and clean UTF-8 strings | ||||
|  */ | ||||
| class Clean | ||||
| { | ||||
|     /** | ||||
|      * Checks if a string contains 7bit ASCII only | ||||
|      * | ||||
|      * @author Andreas Haerter <andreas.haerter@dev.mail-node.com> | ||||
|      * | ||||
|      * @param string $str | ||||
|      * @return bool | ||||
|      */ | ||||
|     public static function isASCII($str) | ||||
|     { | ||||
|         return (preg_match('/(?:[^\x00-\x7F])/', $str) !== 1); | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Tries to detect if a string is in Unicode encoding | ||||
|      * | ||||
|      * @author <bmorel@ssi.fr> | ||||
|      * @link   http://php.net/manual/en/function.utf8-encode.php | ||||
|      * | ||||
|      * @param string $str | ||||
|      * @return bool | ||||
|      */ | ||||
|     public static function isUtf8($str) | ||||
|     { | ||||
|         $len = strlen($str); | ||||
|         for ($i = 0; $i < $len; $i++) { | ||||
|             $b = ord($str[$i]); | ||||
|             if ($b < 0x80) continue; # 0bbbbbbb | ||||
|             elseif (($b & 0xE0) === 0xC0) $n = 1; # 110bbbbb | ||||
|             elseif (($b & 0xF0) === 0xE0) $n = 2; # 1110bbbb | ||||
|             elseif (($b & 0xF8) === 0xF0) $n = 3; # 11110bbb | ||||
|             elseif (($b & 0xFC) === 0xF8) $n = 4; # 111110bb | ||||
|             elseif (($b & 0xFE) === 0xFC) $n = 5; # 1111110b | ||||
|             else return false; # Does not match any model | ||||
|  | ||||
|             for ($j = 0; $j < $n; $j++) { # n bytes matching 10bbbbbb follow ? | ||||
|                 if ((++$i === $len) || ((ord($str[$i]) & 0xC0) !== 0x80)) | ||||
|                     return false; | ||||
|             } | ||||
|         } | ||||
|         return true; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Strips all high byte chars | ||||
|      * | ||||
|      * Returns a pure ASCII7 string | ||||
|      * | ||||
|      * @author Andreas Gohr <andi@splitbrain.org> | ||||
|      * | ||||
|      * @param string $str | ||||
|      * @return string | ||||
|      */ | ||||
|     public static function strip($str) | ||||
|     { | ||||
|         $ascii = ''; | ||||
|         $len = strlen($str); | ||||
|         for ($i = 0; $i < $len; $i++) { | ||||
|             if (ord($str[$i]) < 128) { | ||||
|                 $ascii .= $str[$i]; | ||||
|             } | ||||
|         } | ||||
|         return $ascii; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Removes special characters (nonalphanumeric) from a UTF-8 string | ||||
|      * | ||||
|      * This function adds the controlchars 0x00 to 0x19 to the array of | ||||
|      * stripped chars (they are not included in $UTF8_SPECIAL_CHARS) | ||||
|      * | ||||
|      * @author Andreas Gohr <andi@splitbrain.org> | ||||
|      * | ||||
|      * @param  string $string The UTF8 string to strip of special chars | ||||
|      * @param  string $repl Replace special with this string | ||||
|      * @param  string $additional Additional chars to strip (used in regexp char class) | ||||
|      * @return string | ||||
|      */ | ||||
|     public static function stripspecials($string, $repl = '', $additional = '') | ||||
|     { | ||||
|         static $specials = null; | ||||
|         if ($specials === null) { | ||||
|             $specials = preg_quote(Table::specialChars(), '/'); | ||||
|         } | ||||
|  | ||||
|         return preg_replace('/[' . $additional . '\x00-\x19' . $specials . ']/u', $repl, $string); | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Replace bad bytes with an alternative character | ||||
|      * | ||||
|      * ASCII character is recommended for replacement char | ||||
|      * | ||||
|      * PCRE Pattern to locate bad bytes in a UTF-8 string | ||||
|      * Comes from W3 FAQ: Multilingual Forms | ||||
|      * Note: modified to include full ASCII range including control chars | ||||
|      * | ||||
|      * @author Harry Fuecks <hfuecks@gmail.com> | ||||
|      * @see http://www.w3.org/International/questions/qa-forms-utf-8 | ||||
|      * | ||||
|      * @param string $str to search | ||||
|      * @param string $replace to replace bad bytes with (defaults to '?') - use ASCII | ||||
|      * @return string | ||||
|      */ | ||||
|     public static function replaceBadBytes($str, $replace = '') | ||||
|     { | ||||
|         $UTF8_BAD = | ||||
|             '([\x00-\x7F]' .                          # ASCII (including control chars) | ||||
|             '|[\xC2-\xDF][\x80-\xBF]' .               # non-overlong 2-byte | ||||
|             '|\xE0[\xA0-\xBF][\x80-\xBF]' .           # excluding overlongs | ||||
|             '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}' .    # straight 3-byte | ||||
|             '|\xED[\x80-\x9F][\x80-\xBF]' .           # excluding surrogates | ||||
|             '|\xF0[\x90-\xBF][\x80-\xBF]{2}' .        # planes 1-3 | ||||
|             '|[\xF1-\xF3][\x80-\xBF]{3}' .            # planes 4-15 | ||||
|             '|\xF4[\x80-\x8F][\x80-\xBF]{2}' .        # plane 16 | ||||
|             '|(.{1}))';                               # invalid byte | ||||
|         ob_start(); | ||||
|         while (preg_match('/' . $UTF8_BAD . '/S', $str, $matches)) { | ||||
|             if (!isset($matches[2])) { | ||||
|                 echo $matches[0]; | ||||
|             } else { | ||||
|                 echo $replace; | ||||
|             } | ||||
|             $str = substr($str, strlen($matches[0])); | ||||
|         } | ||||
|         return ob_get_clean(); | ||||
|     } | ||||
|  | ||||
|  | ||||
|     /** | ||||
|      * Replace accented UTF-8 characters by unaccented ASCII-7 equivalents | ||||
|      * | ||||
|      * Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1) | ||||
|      * letters. Default is to deaccent both cases ($case = 0) | ||||
|      * | ||||
|      * @author Andreas Gohr <andi@splitbrain.org> | ||||
|      * | ||||
|      * @param string $string | ||||
|      * @param int $case | ||||
|      * @return string | ||||
|      */ | ||||
|     public static function deaccent($string, $case = 0) | ||||
|     { | ||||
|         if ($case <= 0) { | ||||
|             $string = strtr($string, Table::lowerAccents()); | ||||
|         } | ||||
|         if ($case >= 0) { | ||||
|             $string = strtr($string, Table::upperAccents()); | ||||
|         } | ||||
|         return $string; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Romanize a non-latin string | ||||
|      * | ||||
|      * @author Andreas Gohr <andi@splitbrain.org> | ||||
|      * | ||||
|      * @param string $string | ||||
|      * @return string | ||||
|      */ | ||||
|     public static function romanize($string) | ||||
|     { | ||||
|         if (self::isASCII($string)) return $string; //nothing to do | ||||
|  | ||||
|         return strtr($string, Table::romanization()); | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * adjust a byte index into a utf8 string to a utf8 character boundary | ||||
|      * | ||||
|      * @author       chris smith <chris@jalakai.co.uk> | ||||
|      * | ||||
|      * @param string $str utf8 character string | ||||
|      * @param int $i byte index into $str | ||||
|      * @param bool $next direction to search for boundary, false = up (current character) true = down (next character) | ||||
|      * @return int byte index into $str now pointing to a utf8 character boundary | ||||
|      */ | ||||
|     public static function correctIdx($str, $i, $next = false) | ||||
|     { | ||||
|  | ||||
|         if ($i <= 0) return 0; | ||||
|  | ||||
|         $limit = strlen($str); | ||||
|         if ($i >= $limit) return $limit; | ||||
|  | ||||
|         if ($next) { | ||||
|             while (($i < $limit) && ((ord($str[$i]) & 0xC0) === 0x80)) $i++; | ||||
|         } else { | ||||
|             while ($i && ((ord($str[$i]) & 0xC0) === 0x80)) $i--; | ||||
|         } | ||||
|  | ||||
|         return $i; | ||||
|     } | ||||
|  | ||||
| } | ||||
		Reference in New Issue
	
	Block a user