ajout de la partie slam dans le dossier web

2022-03-10 11:56:26 +01:00
parent 31d3052792
commit e375c4f088
4847 changed files with 325719 additions and 0 deletions
--- a/ap23/web/doku/inc/Utf8/Asian.php
+++ b/ap23/web/doku/inc/Utf8/Asian.php
@@ -0,0 +1,99 @@
+<?php
+
+namespace dokuwiki\Utf8;
+
+/**
+ * Methods and constants to handle Asian "words"
+ *
+ * This uses a crude regexp to determine which parts of an Asian string should be treated as words.
+ * This is necessary because in some Asian languages a single unicode char represents a whole idea
+ * without spaces separating them.
+ */
+class Asian
+{
+
+    /**
+     * This defines a non-capturing group for the use in regular expressions to match any asian character that
+     * needs to be treated as a word. Uses the Unicode-Ranges for Asian characters taken from
+     * http://en.wikipedia.org/wiki/Unicode_block
+     */
+    const REGEXP =
+        '(?:' .
+
+        '[\x{0E00}-\x{0E7F}]' . // Thai
+
+        '|' .
+
+        '[' .
+        '\x{2E80}-\x{3040}' .  // CJK -> Hangul
+        '\x{309D}-\x{30A0}' .
+        '\x{30FD}-\x{31EF}\x{3200}-\x{D7AF}' .
+        '\x{F900}-\x{FAFF}' .  // CJK Compatibility Ideographs
+        '\x{FE30}-\x{FE4F}' .  // CJK Compatibility Forms
+        "\xF0\xA0\x80\x80-\xF0\xAA\x9B\x9F" . // CJK Extension B
+        "\xF0\xAA\x9C\x80-\xF0\xAB\x9C\xBF" . // CJK Extension C
+        "\xF0\xAB\x9D\x80-\xF0\xAB\xA0\x9F" . // CJK Extension D
+        "\xF0\xAF\xA0\x80-\xF0\xAF\xAB\xBF" . // CJK Compatibility Supplement
+        ']' .
+
+        '|' .
+
+        '[' .                // Hiragana/Katakana (can be two characters)
+        '\x{3042}\x{3044}\x{3046}\x{3048}' .
+        '\x{304A}-\x{3062}\x{3064}-\x{3082}' .
+        '\x{3084}\x{3086}\x{3088}-\x{308D}' .
+        '\x{308F}-\x{3094}' .
+        '\x{30A2}\x{30A4}\x{30A6}\x{30A8}' .
+        '\x{30AA}-\x{30C2}\x{30C4}-\x{30E2}' .
+        '\x{30E4}\x{30E6}\x{30E8}-\x{30ED}' .
+        '\x{30EF}-\x{30F4}\x{30F7}-\x{30FA}' .
+        '][' .
+        '\x{3041}\x{3043}\x{3045}\x{3047}\x{3049}' .
+        '\x{3063}\x{3083}\x{3085}\x{3087}\x{308E}\x{3095}-\x{309C}' .
+        '\x{30A1}\x{30A3}\x{30A5}\x{30A7}\x{30A9}' .
+        '\x{30C3}\x{30E3}\x{30E5}\x{30E7}\x{30EE}\x{30F5}\x{30F6}\x{30FB}\x{30FC}' .
+        '\x{31F0}-\x{31FF}' .
+        ']?' .
+        ')';
+
+
+    /**
+     * Check if the given term contains Asian word characters
+     *
+     * @param string $term
+     * @return bool
+     */
+    public static function isAsianWords($term)
+    {
+        return (bool)preg_match('/' . self::REGEXP . '/u', $term);
+    }
+
+    /**
+     * Surround all Asian words in the given text with the given separator
+     *
+     * @param string $text Original text containing asian words
+     * @param string $sep the separator to use
+     * @return string Text with separated asian words
+     */
+    public static function separateAsianWords($text, $sep = ' ')
+    {
+        // handle asian chars as single words (may fail on older PHP version)
+        $asia = @preg_replace('/(' . self::REGEXP . ')/u', $sep . '\1' . $sep, $text);
+        if (!is_null($asia)) $text = $asia; // recover from regexp falure
+
+        return $text;
+    }
+
+    /**
+     * Split the given text into separate parts
+     *
+     * Each part is either a non-asian string, or a single asian word
+     *
+     * @param string $term
+     * @return string[]
+     */
+    public static function splitAsianWords($term)
+    {
+        return preg_split('/(' . self::REGEXP . '+)/u', $term, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
+    }
+}
--- a/ap23/web/doku/inc/Utf8/Clean.php
+++ b/ap23/web/doku/inc/Utf8/Clean.php
@@ -0,0 +1,204 @@
+<?php
+
+namespace dokuwiki\Utf8;
+
+/**
+ * Methods to assess and clean UTF-8 strings
+ */
+class Clean
+{
+    /**
+     * Checks if a string contains 7bit ASCII only
+     *
+     * @author Andreas Haerter <andreas.haerter@dev.mail-node.com>
+     *
+     * @param string $str
+     * @return bool
+     */
+    public static function isASCII($str)
+    {
+        return (preg_match('/(?:[^\x00-\x7F])/', $str) !== 1);
+    }
+
+    /**
+     * Tries to detect if a string is in Unicode encoding
+     *
+     * @author <bmorel@ssi.fr>
+     * @link   http://php.net/manual/en/function.utf8-encode.php
+     *
+     * @param string $str
+     * @return bool
+     */
+    public static function isUtf8($str)
+    {
+        $len = strlen($str);
+        for ($i = 0; $i < $len; $i++) {
+            $b = ord($str[$i]);
+            if ($b < 0x80) continue; # 0bbbbbbb
+            elseif (($b & 0xE0) === 0xC0) $n = 1; # 110bbbbb
+            elseif (($b & 0xF0) === 0xE0) $n = 2; # 1110bbbb
+            elseif (($b & 0xF8) === 0xF0) $n = 3; # 11110bbb
+            elseif (($b & 0xFC) === 0xF8) $n = 4; # 111110bb
+            elseif (($b & 0xFE) === 0xFC) $n = 5; # 1111110b
+            else return false; # Does not match any model
+
+            for ($j = 0; $j < $n; $j++) { # n bytes matching 10bbbbbb follow ?
+                if ((++$i === $len) || ((ord($str[$i]) & 0xC0) !== 0x80))
+                    return false;
+            }
+        }
+        return true;
+    }
+
+    /**
+     * Strips all high byte chars
+     *
+     * Returns a pure ASCII7 string
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     *
+     * @param string $str
+     * @return string
+     */
+    public static function strip($str)
+    {
+        $ascii = '';
+        $len = strlen($str);
+        for ($i = 0; $i < $len; $i++) {
+            if (ord($str[$i]) < 128) {
+                $ascii .= $str[$i];
+            }
+        }
+        return $ascii;
+    }
+
+    /**
+     * Removes special characters (nonalphanumeric) from a UTF-8 string
+     *
+     * This function adds the controlchars 0x00 to 0x19 to the array of
+     * stripped chars (they are not included in $UTF8_SPECIAL_CHARS)
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     *
+     * @param  string $string The UTF8 string to strip of special chars
+     * @param  string $repl Replace special with this string
+     * @param  string $additional Additional chars to strip (used in regexp char class)
+     * @return string
+     */
+    public static function stripspecials($string, $repl = '', $additional = '')
+    {
+        static $specials = null;
+        if ($specials === null) {
+            $specials = preg_quote(Table::specialChars(), '/');
+        }
+
+        return preg_replace('/[' . $additional . '\x00-\x19' . $specials . ']/u', $repl, $string);
+    }
+
+    /**
+     * Replace bad bytes with an alternative character
+     *
+     * ASCII character is recommended for replacement char
+     *
+     * PCRE Pattern to locate bad bytes in a UTF-8 string
+     * Comes from W3 FAQ: Multilingual Forms
+     * Note: modified to include full ASCII range including control chars
+     *
+     * @author Harry Fuecks <hfuecks@gmail.com>
+     * @see http://www.w3.org/International/questions/qa-forms-utf-8
+     *
+     * @param string $str to search
+     * @param string $replace to replace bad bytes with (defaults to '?') - use ASCII
+     * @return string
+     */
+    public static function replaceBadBytes($str, $replace = '')
+    {
+        $UTF8_BAD =
+            '([\x00-\x7F]' .                          # ASCII (including control chars)
+            '|[\xC2-\xDF][\x80-\xBF]' .               # non-overlong 2-byte
+            '|\xE0[\xA0-\xBF][\x80-\xBF]' .           # excluding overlongs
+            '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}' .    # straight 3-byte
+            '|\xED[\x80-\x9F][\x80-\xBF]' .           # excluding surrogates
+            '|\xF0[\x90-\xBF][\x80-\xBF]{2}' .        # planes 1-3
+            '|[\xF1-\xF3][\x80-\xBF]{3}' .            # planes 4-15
+            '|\xF4[\x80-\x8F][\x80-\xBF]{2}' .        # plane 16
+            '|(.{1}))';                               # invalid byte
+        ob_start();
+        while (preg_match('/' . $UTF8_BAD . '/S', $str, $matches)) {
+            if (!isset($matches[2])) {
+                echo $matches[0];
+            } else {
+                echo $replace;
+            }
+            $str = substr($str, strlen($matches[0]));
+        }
+        return ob_get_clean();
+    }
+
+
+    /**
+     * Replace accented UTF-8 characters by unaccented ASCII-7 equivalents
+     *
+     * Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1)
+     * letters. Default is to deaccent both cases ($case = 0)
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     *
+     * @param string $string
+     * @param int $case
+     * @return string
+     */
+    public static function deaccent($string, $case = 0)
+    {
+        if ($case <= 0) {
+            $string = strtr($string, Table::lowerAccents());
+        }
+        if ($case >= 0) {
+            $string = strtr($string, Table::upperAccents());
+        }
+        return $string;
+    }
+
+    /**
+     * Romanize a non-latin string
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     *
+     * @param string $string
+     * @return string
+     */
+    public static function romanize($string)
+    {
+        if (self::isASCII($string)) return $string; //nothing to do
+
+        return strtr($string, Table::romanization());
+    }
+
+    /**
+     * adjust a byte index into a utf8 string to a utf8 character boundary
+     *
+     * @author       chris smith <chris@jalakai.co.uk>
+     *
+     * @param string $str utf8 character string
+     * @param int $i byte index into $str
+     * @param bool $next direction to search for boundary, false = up (current character) true = down (next character)
+     * @return int byte index into $str now pointing to a utf8 character boundary
+     */
+    public static function correctIdx($str, $i, $next = false)
+    {
+
+        if ($i <= 0) return 0;
+
+        $limit = strlen($str);
+        if ($i >= $limit) return $limit;
+
+        if ($next) {
+            while (($i < $limit) && ((ord($str[$i]) & 0xC0) === 0x80)) $i++;
+        } else {
+            while ($i && ((ord($str[$i]) & 0xC0) === 0x80)) $i--;
+        }
+
+        return $i;
+    }
+
+}
--- a/ap23/web/doku/inc/Utf8/Conversion.php
+++ b/ap23/web/doku/inc/Utf8/Conversion.php
@@ -0,0 +1,162 @@
+<?php
+
+namespace dokuwiki\Utf8;
+
+/**
+ * Methods to convert from and to UTF-8 strings
+ */
+class Conversion
+{
+
+    /**
+     * Encodes UTF-8 characters to HTML entities
+     *
+     * @author Tom N Harris <tnharris@whoopdedo.org>
+     * @author <vpribish at shopping dot com>
+     * @link   http://php.net/manual/en/function.utf8-decode.php
+     *
+     * @param string $str
+     * @param bool $all Encode non-utf8 char to HTML as well
+     * @return string
+     */
+    public static function toHtml($str, $all = false)
+    {
+        $ret = '';
+        foreach (Unicode::fromUtf8($str) as $cp) {
+            if ($cp < 0x80 && !$all) {
+                $ret .= chr($cp);
+            } elseif ($cp < 0x100) {
+                $ret .= "&#$cp;";
+            } else {
+                $ret .= '&#x' . dechex($cp) . ';';
+            }
+        }
+        return $ret;
+    }
+
+    /**
+     * Decodes HTML entities to UTF-8 characters
+     *
+     * Convert any &#..; entity to a codepoint,
+     * The entities flag defaults to only decoding numeric entities.
+     * Pass HTML_ENTITIES and named entities, including &amp; &lt; etc.
+     * are handled as well. Avoids the problem that would occur if you
+     * had to decode "&amp;#38;&#38;amp;#38;"
+     *
+     * unhtmlspecialchars(\dokuwiki\Utf8\Conversion::fromHtml($s)) -> "&#38;&#38;"
+     * \dokuwiki\Utf8\Conversion::fromHtml(unhtmlspecialchars($s)) -> "&&amp#38;"
+     * what it should be                   -> "&#38;&amp#38;"
+     *
+     * @author Tom N Harris <tnharris@whoopdedo.org>
+     *
+     * @param  string $str UTF-8 encoded string
+     * @param  boolean $entities decode name entities in addtition to numeric ones
+     * @return string  UTF-8 encoded string with numeric (and named) entities replaced.
+     */
+    public static function fromHtml($str, $entities = false)
+    {
+        if (!$entities) {
+            return preg_replace_callback(
+                '/(&#([Xx])?([0-9A-Za-z]+);)/m',
+                [__CLASS__, 'decodeNumericEntity'],
+                $str
+            );
+        }
+
+        return preg_replace_callback(
+            '/&(#)?([Xx])?([0-9A-Za-z]+);/m',
+            [__CLASS__, 'decodeAnyEntity'],
+            $str
+        );
+    }
+
+    /**
+     * Decodes any HTML entity to it's correct UTF-8 char equivalent
+     *
+     * @param string $ent An entity
+     * @return string
+     */
+    protected static function decodeAnyEntity($ent)
+    {
+        // create the named entity lookup table
+        static $table = null;
+        if ($table === null) {
+            $table = get_html_translation_table(HTML_ENTITIES);
+            $table = array_flip($table);
+            $table = array_map(
+                static function ($c) {
+                    return Unicode::toUtf8(array(ord($c)));
+                },
+                $table
+            );
+        }
+
+        if ($ent[1] === '#') {
+            return self::decodeNumericEntity($ent);
+        }
+
+        if (array_key_exists($ent[0], $table)) {
+            return $table[$ent[0]];
+        }
+
+        return $ent[0];
+    }
+
+    /**
+     * Decodes numeric HTML entities to their correct UTF-8 characters
+     *
+     * @param $ent string A numeric entity
+     * @return string|false
+     */
+    protected static function decodeNumericEntity($ent)
+    {
+        switch ($ent[2]) {
+            case 'X':
+            case 'x':
+                $cp = hexdec($ent[3]);
+                break;
+            default:
+                $cp = intval($ent[3]);
+                break;
+        }
+        return Unicode::toUtf8(array($cp));
+    }
+
+    /**
+     * UTF-8 to UTF-16BE conversion.
+     *
+     * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits
+     *
+     * @param string $str
+     * @param bool $bom
+     * @return string
+     */
+    public static function toUtf16be($str, $bom = false)
+    {
+        $out = $bom ? "\xFE\xFF" : '';
+        if (UTF8_MBSTRING) {
+            return $out . mb_convert_encoding($str, 'UTF-16BE', 'UTF-8');
+        }
+
+        $uni = Unicode::fromUtf8($str);
+        foreach ($uni as $cp) {
+            $out .= pack('n', $cp);
+        }
+        return $out;
+    }
+
+    /**
+     * UTF-8 to UTF-16BE conversion.
+     *
+     * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits
+     *
+     * @param string $str
+     * @return false|string
+     */
+    public static function fromUtf16be($str)
+    {
+        $uni = unpack('n*', $str);
+        return Unicode::toUtf8($uni);
+    }
+
+}
--- a/ap23/web/doku/inc/Utf8/PhpString.php
+++ b/ap23/web/doku/inc/Utf8/PhpString.php
@@ -0,0 +1,383 @@
+<?php
+
+namespace dokuwiki\Utf8;
+
+/**
+ * UTF-8 aware equivalents to PHP's string functions
+ */
+class PhpString
+{
+
+    /**
+     * A locale independent basename() implementation
+     *
+     * works around a bug in PHP's basename() implementation
+     *
+     * @param string $path A path
+     * @param string $suffix If the name component ends in suffix this will also be cut off
+     * @return string
+     * @link   https://bugs.php.net/bug.php?id=37738
+     *
+     * @see basename()
+     */
+    public static function basename($path, $suffix = '')
+    {
+        $path = trim($path, '\\/');
+        $rpos = max(strrpos($path, '/'), strrpos($path, '\\'));
+        if ($rpos) {
+            $path = substr($path, $rpos + 1);
+        }
+
+        $suflen = strlen($suffix);
+        if ($suflen && (substr($path, -$suflen) === $suffix)) {
+            $path = substr($path, 0, -$suflen);
+        }
+
+        return $path;
+    }
+
+    /**
+     * Unicode aware replacement for strlen()
+     *
+     * utf8_decode() converts characters that are not in ISO-8859-1
+     * to '?', which, for the purpose of counting, is alright - It's
+     * even faster than mb_strlen.
+     *
+     * @param string $string
+     * @return int
+     * @see    utf8_decode()
+     *
+     * @author <chernyshevsky at hotmail dot com>
+     * @see    strlen()
+     */
+    public static function strlen($string)
+    {
+        if (function_exists('utf8_decode')) {
+            return strlen(utf8_decode($string));
+        }
+
+        if (UTF8_MBSTRING) {
+            return mb_strlen($string, 'UTF-8');
+        }
+
+        if (function_exists('iconv_strlen')) {
+            return iconv_strlen($string, 'UTF-8');
+        }
+
+        return strlen($string);
+    }
+
+    /**
+     * UTF-8 aware alternative to substr
+     *
+     * Return part of a string given character offset (and optionally length)
+     *
+     * @param string $str
+     * @param int $offset number of UTF-8 characters offset (from left)
+     * @param int $length (optional) length in UTF-8 characters from offset
+     * @return string
+     * @author Harry Fuecks <hfuecks@gmail.com>
+     * @author Chris Smith <chris@jalakai.co.uk>
+     *
+     */
+    public static function substr($str, $offset, $length = null)
+    {
+        if (UTF8_MBSTRING) {
+            if ($length === null) {
+                return mb_substr($str, $offset);
+            }
+
+            return mb_substr($str, $offset, $length);
+        }
+
+        /*
+         * Notes:
+         *
+         * no mb string support, so we'll use pcre regex's with 'u' flag
+         * pcre only supports repetitions of less than 65536, in order to accept up to MAXINT values for
+         * offset and length, we'll repeat a group of 65535 characters when needed (ok, up to MAXINT-65536)
+         *
+         * substr documentation states false can be returned in some cases (e.g. offset > string length)
+         * mb_substr never returns false, it will return an empty string instead.
+         *
+         * calculating the number of characters in the string is a relatively expensive operation, so
+         * we only carry it out when necessary. It isn't necessary for +ve offsets and no specified length
+         */
+
+        // cast parameters to appropriate types to avoid multiple notices/warnings
+        $str = (string)$str;                          // generates E_NOTICE for PHP4 objects, but not PHP5 objects
+        $offset = (int)$offset;
+        if ($length !== null) $length = (int)$length;
+
+        // handle trivial cases
+        if ($length === 0) return '';
+        if ($offset < 0 && $length < 0 && $length < $offset) return '';
+
+        $offset_pattern = '';
+        $length_pattern = '';
+
+        // normalise -ve offsets (we could use a tail anchored pattern, but they are horribly slow!)
+        if ($offset < 0) {
+            $strlen = self::strlen($str);        // see notes
+            $offset = $strlen + $offset;
+            if ($offset < 0) $offset = 0;
+        }
+
+        // establish a pattern for offset, a non-captured group equal in length to offset
+        if ($offset > 0) {
+            $Ox = (int)($offset / 65535);
+            $Oy = $offset % 65535;
+
+            if ($Ox) $offset_pattern = '(?:.{65535}){' . $Ox . '}';
+            $offset_pattern = '^(?:' . $offset_pattern . '.{' . $Oy . '})';
+        } else {
+            $offset_pattern = '^';                      // offset == 0; just anchor the pattern
+        }
+
+        // establish a pattern for length
+        if ($length === null) {
+            $length_pattern = '(.*)$';                  // the rest of the string
+        } else {
+
+            if (!isset($strlen)) $strlen = self::strlen($str);    // see notes
+            if ($offset > $strlen) return '';           // another trivial case
+
+            if ($length > 0) {
+
+                // reduce any length that would go past the end of the string
+                $length = min($strlen - $offset, $length);
+
+                $Lx = (int)($length / 65535);
+                $Ly = $length % 65535;
+
+                // +ve length requires ... a captured group of length characters
+                if ($Lx) $length_pattern = '(?:.{65535}){' . $Lx . '}';
+                $length_pattern = '(' . $length_pattern . '.{' . $Ly . '})';
+
+            } else if ($length < 0) {
+
+                if ($length < ($offset - $strlen)) return '';
+
+                $Lx = (int)((-$length) / 65535);
+                $Ly = (-$length) % 65535;
+
+                // -ve length requires ... capture everything except a group of -length characters
+                //                         anchored at the tail-end of the string
+                if ($Lx) $length_pattern = '(?:.{65535}){' . $Lx . '}';
+                $length_pattern = '(.*)(?:' . $length_pattern . '.{' . $Ly . '})$';
+            }
+        }
+
+        if (!preg_match('#' . $offset_pattern . $length_pattern . '#us', $str, $match)) return '';
+        return $match[1];
+    }
+
+    // phpcs:disable PSR1.Methods.CamelCapsMethodName.NotCamelCaps
+    /**
+     * Unicode aware replacement for substr_replace()
+     *
+     * @param string $string input string
+     * @param string $replacement the replacement
+     * @param int $start the replacing will begin at the start'th offset into string.
+     * @param int $length If given and is positive, it represents the length of the portion of string which is
+     *                            to be replaced. If length is zero then this function will have the effect of inserting
+     *                            replacement into string at the given start offset.
+     * @return string
+     * @see    substr_replace()
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     */
+    public static function substr_replace($string, $replacement, $start, $length = 0)
+    {
+        $ret = '';
+        if ($start > 0) $ret .= self::substr($string, 0, $start);
+        $ret .= $replacement;
+        $ret .= self::substr($string, $start + $length);
+        return $ret;
+    }
+    // phpcs:enable PSR1.Methods.CamelCapsMethodName.NotCamelCaps
+
+    /**
+     * Unicode aware replacement for ltrim()
+     *
+     * @param string $str
+     * @param string $charlist
+     * @return string
+     * @see    ltrim()
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     */
+    public static function ltrim($str, $charlist = '')
+    {
+        if ($charlist === '') return ltrim($str);
+
+        //quote charlist for use in a characterclass
+        $charlist = preg_replace('!([\\\\\\-\\]\\[/])!', '\\\${1}', $charlist);
+
+        return preg_replace('/^[' . $charlist . ']+/u', '', $str);
+    }
+
+    /**
+     * Unicode aware replacement for rtrim()
+     *
+     * @param string $str
+     * @param string $charlist
+     * @return string
+     * @see    rtrim()
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     */
+    public static function rtrim($str, $charlist = '')
+    {
+        if ($charlist === '') return rtrim($str);
+
+        //quote charlist for use in a characterclass
+        $charlist = preg_replace('!([\\\\\\-\\]\\[/])!', '\\\${1}', $charlist);
+
+        return preg_replace('/[' . $charlist . ']+$/u', '', $str);
+    }
+
+    /**
+     * Unicode aware replacement for trim()
+     *
+     * @param string $str
+     * @param string $charlist
+     * @return string
+     * @see    trim()
+     *
+     * @author Andreas Gohr <andi@splitbrain.org>
+     */
+    public static function trim($str, $charlist = '')
+    {
+        if ($charlist === '') return trim($str);
+
+        return self::ltrim(self::rtrim($str, $charlist), $charlist);
+    }
+
+    /**
+     * This is a unicode aware replacement for strtolower()
+     *
+     * Uses mb_string extension if available
+     *
+     * @param string $string
+     * @return string
+     * @see    \dokuwiki\Utf8\PhpString::strtoupper()
+     *
+     * @author Leo Feyer <leo@typolight.org>
+     * @see    strtolower()
+     */
+    public static function strtolower($string)
+    {
+        if (UTF8_MBSTRING) {
+            if (class_exists('Normalizer', $autoload = false)) {
+                return \Normalizer::normalize(mb_strtolower($string, 'utf-8'));
+            }
+            return (mb_strtolower($string, 'utf-8'));
+        }
+        return strtr($string, Table::upperCaseToLowerCase());
+    }
+
+    /**
+     * This is a unicode aware replacement for strtoupper()
+     *
+     * Uses mb_string extension if available
+     *
+     * @param string $string
+     * @return string
+     * @see    \dokuwiki\Utf8\PhpString::strtoupper()
+     *
+     * @author Leo Feyer <leo@typolight.org>
+     * @see    strtoupper()
+     */
+    public static function strtoupper($string)
+    {
+        if (UTF8_MBSTRING) return mb_strtoupper($string, 'utf-8');
+
+        return strtr($string, Table::lowerCaseToUpperCase());
+    }
+
+
+    /**
+     * UTF-8 aware alternative to ucfirst
+     * Make a string's first character uppercase
+     *
+     * @param string $str
+     * @return string with first character as upper case (if applicable)
+     * @author Harry Fuecks
+     *
+     */
+    public static function ucfirst($str)
+    {
+        switch (self::strlen($str)) {
+            case 0:
+                return '';
+            case 1:
+                return self::strtoupper($str);
+            default:
+                preg_match('/^(.{1})(.*)$/us', $str, $matches);
+                return self::strtoupper($matches[1]) . $matches[2];
+        }
+    }
+
+    /**
+     * UTF-8 aware alternative to ucwords
+     * Uppercase the first character of each word in a string
+     *
+     * @param string $str
+     * @return string with first char of each word uppercase
+     * @author Harry Fuecks
+     * @see http://php.net/ucwords
+     *
+     */
+    public static function ucwords($str)
+    {
+        // Note: [\x0c\x09\x0b\x0a\x0d\x20] matches;
+        // form feeds, horizontal tabs, vertical tabs, linefeeds and carriage returns
+        // This corresponds to the definition of a "word" defined at http://php.net/ucwords
+        $pattern = '/(^|([\x0c\x09\x0b\x0a\x0d\x20]+))([^\x0c\x09\x0b\x0a\x0d\x20]{1})[^\x0c\x09\x0b\x0a\x0d\x20]*/u';
+
+        return preg_replace_callback(
+            $pattern,
+            function ($matches) {
+                $leadingws = $matches[2];
+                $ucfirst = self::strtoupper($matches[3]);
+                $ucword = self::substr_replace(ltrim($matches[0]), $ucfirst, 0, 1);
+                return $leadingws . $ucword;
+            },
+            $str
+        );
+    }
+
+    /**
+     * This is an Unicode aware replacement for strpos
+     *
+     * @param string $haystack
+     * @param string $needle
+     * @param integer $offset
+     * @return integer
+     * @author Leo Feyer <leo@typolight.org>
+     * @see    strpos()
+     *
+     */
+    public static function strpos($haystack, $needle, $offset = 0)
+    {
+        $comp = 0;
+        $length = null;
+
+        while ($length === null || $length < $offset) {
+            $pos = strpos($haystack, $needle, $offset + $comp);
+
+            if ($pos === false)
+                return false;
+
+            $length = self::strlen(substr($haystack, 0, $pos));
+
+            if ($length < $offset)
+                $comp = $pos - $length;
+        }
+
+        return $length;
+    }
+
+
+}
--- a/ap23/web/doku/inc/Utf8/Table.php
+++ b/ap23/web/doku/inc/Utf8/Table.php
@@ -0,0 +1,93 @@
+<?php
+
+namespace dokuwiki\Utf8;
+
+/**
+ * Provides static access to the UTF-8 conversion tables
+ *
+ * Lazy-Loads tables on first access
+ */
+class Table
+{
+
+    /**
+     * Get the upper to lower case conversion table
+     *
+     * @return array
+     */
+    public static function upperCaseToLowerCase()
+    {
+        static $table = null;
+        if ($table === null) $table = include __DIR__ . '/tables/case.php';
+        return $table;
+    }
+
+    /**
+     * Get the lower to upper case conversion table
+     *
+     * @return array
+     */
+    public static function lowerCaseToUpperCase()
+    {
+        static $table = null;
+        if ($table === null) {
+            $uclc = self::upperCaseToLowerCase();
+            $table = array_flip($uclc);
+        }
+        return $table;
+    }
+
+    /**
+     * Get the lower case accent table
+     * @return array
+     */
+    public static function lowerAccents()
+    {
+        static $table = null;
+        if ($table === null) {
+            $table = include __DIR__ . '/tables/loweraccents.php';
+        }
+        return $table;
+    }
+
+    /**
+     * Get the lower case accent table
+     * @return array
+     */
+    public static function upperAccents()
+    {
+        static $table = null;
+        if ($table === null) {
+            $table = include __DIR__ . '/tables/upperaccents.php';
+        }
+        return $table;
+    }
+
+    /**
+     * Get the romanization table
+     * @return array
+     */
+    public static function romanization()
+    {
+        static $table = null;
+        if ($table === null) {
+            $table = include __DIR__ . '/tables/romanization.php';
+        }
+        return $table;
+    }
+
+    /**
+     * Get the special chars as a concatenated string
+     * @return string
+     */
+    public static function specialChars()
+    {
+        static $string = null;
+        if ($string === null) {
+            $table = include __DIR__ . '/tables/specials.php';
+            // FIXME should we cache this to file system?
+            $string = Unicode::toUtf8($table);
+        }
+        return $string;
+    }
+}
--- a/ap23/web/doku/inc/Utf8/Unicode.php
+++ b/ap23/web/doku/inc/Utf8/Unicode.php
@@ -0,0 +1,277 @@
+<?php
+
+namespace dokuwiki\Utf8;
+
+/**
+ * Convert between UTF-8 and a list of Unicode Code Points
+ */
+class Unicode
+{
+
+    /**
+     * Takes an UTF-8 string and returns an array of ints representing the
+     * Unicode characters. Astral planes are supported ie. the ints in the
+     * output can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates
+     * are not allowed.
+     *
+     * If $strict is set to true the function returns false if the input
+     * string isn't a valid UTF-8 octet sequence and raises a PHP error at
+     * level E_USER_WARNING
+     *
+     * Note: this function has been modified slightly in this library to
+     * trigger errors on encountering bad bytes
+     *
+     * @author <hsivonen@iki.fi>
+     * @author Harry Fuecks <hfuecks@gmail.com>
+     * @see    unicode_to_utf8
+     * @link   http://hsivonen.iki.fi/php-utf8/
+     * @link   http://sourceforge.net/projects/phputf8/
+     * @todo break into less complex chunks
+     * @todo use exceptions instead of user errors
+     *
+     * @param  string $str UTF-8 encoded string
+     * @param  boolean $strict Check for invalid sequences?
+     * @return mixed array of unicode code points or false if UTF-8 invalid
+     */
+    public static function fromUtf8($str, $strict = false)
+    {
+        $mState = 0;     // cached expected number of octets after the current octet
+        // until the beginning of the next UTF8 character sequence
+        $mUcs4 = 0;     // cached Unicode character
+        $mBytes = 1;     // cached expected number of octets in the current sequence
+
+        $out = array();
+
+        $len = strlen($str);
+
+        for ($i = 0; $i < $len; $i++) {
+
+            $in = ord($str[$i]);
+
+            if ($mState === 0) {
+
+                // When mState is zero we expect either a US-ASCII character or a
+                // multi-octet sequence.
+                if (0 === (0x80 & $in)) {
+                    // US-ASCII, pass straight through.
+                    $out[] = $in;
+                    $mBytes = 1;
+
+                } else if (0xC0 === (0xE0 & $in)) {
+                    // First octet of 2 octet sequence
+                    $mUcs4 = $in;
+                    $mUcs4 = ($mUcs4 & 0x1F) << 6;
+                    $mState = 1;
+                    $mBytes = 2;
+
+                } else if (0xE0 === (0xF0 & $in)) {
+                    // First octet of 3 octet sequence
+                    $mUcs4 = $in;
+                    $mUcs4 = ($mUcs4 & 0x0F) << 12;
+                    $mState = 2;
+                    $mBytes = 3;
+
+                } else if (0xF0 === (0xF8 & $in)) {
+                    // First octet of 4 octet sequence
+                    $mUcs4 = $in;
+                    $mUcs4 = ($mUcs4 & 0x07) << 18;
+                    $mState = 3;
+                    $mBytes = 4;
+
+                } else if (0xF8 === (0xFC & $in)) {
+                    /* First octet of 5 octet sequence.
+                     *
+                     * This is illegal because the encoded codepoint must be either
+                     * (a) not the shortest form or
+                     * (b) outside the Unicode range of 0-0x10FFFF.
+                     * Rather than trying to resynchronize, we will carry on until the end
+                     * of the sequence and let the later error handling code catch it.
+                     */
+                    $mUcs4 = $in;
+                    $mUcs4 = ($mUcs4 & 0x03) << 24;
+                    $mState = 4;
+                    $mBytes = 5;
+
+                } else if (0xFC === (0xFE & $in)) {
+                    // First octet of 6 octet sequence, see comments for 5 octet sequence.
+                    $mUcs4 = $in;
+                    $mUcs4 = ($mUcs4 & 1) << 30;
+                    $mState = 5;
+                    $mBytes = 6;
+
+                } elseif ($strict) {
+                    /* Current octet is neither in the US-ASCII range nor a legal first
+                     * octet of a multi-octet sequence.
+                     */
+                    trigger_error(
+                        'utf8_to_unicode: Illegal sequence identifier ' .
+                        'in UTF-8 at byte ' . $i,
+                        E_USER_WARNING
+                    );
+                    return false;
+
+                }
+
+            } else {
+
+                // When mState is non-zero, we expect a continuation of the multi-octet
+                // sequence
+                if (0x80 === (0xC0 & $in)) {
+
+                    // Legal continuation.
+                    $shift = ($mState - 1) * 6;
+                    $tmp = $in;
+                    $tmp = ($tmp & 0x0000003F) << $shift;
+                    $mUcs4 |= $tmp;
+
+                    /**
+                     * End of the multi-octet sequence. mUcs4 now contains the final
+                     * Unicode codepoint to be output
+                     */
+                    if (0 === --$mState) {
+
+                        /*
+                         * Check for illegal sequences and codepoints.
+                         */
+                        // From Unicode 3.1, non-shortest form is illegal
+                        if (((2 === $mBytes) && ($mUcs4 < 0x0080)) ||
+                            ((3 === $mBytes) && ($mUcs4 < 0x0800)) ||
+                            ((4 === $mBytes) && ($mUcs4 < 0x10000)) ||
+                            (4 < $mBytes) ||
+                            // From Unicode 3.2, surrogate characters are illegal
+                            (($mUcs4 & 0xFFFFF800) === 0xD800) ||
+                            // Codepoints outside the Unicode range are illegal
+                            ($mUcs4 > 0x10FFFF)) {
+
+                            if ($strict) {
+                                trigger_error(
+                                    'utf8_to_unicode: Illegal sequence or codepoint ' .
+                                    'in UTF-8 at byte ' . $i,
+                                    E_USER_WARNING
+                                );
+
+                                return false;
+                            }
+
+                        }
+
+                        if (0xFEFF !== $mUcs4) {
+                            // BOM is legal but we don't want to output it
+                            $out[] = $mUcs4;
+                        }
+
+                        //initialize UTF8 cache
+                        $mState = 0;
+                        $mUcs4 = 0;
+                        $mBytes = 1;
+                    }
+
+                } elseif ($strict) {
+                    /**
+                     *((0xC0 & (*in) != 0x80) && (mState != 0))
+                     * Incomplete multi-octet sequence.
+                     */
+                    trigger_error(
+                        'utf8_to_unicode: Incomplete multi-octet ' .
+                        '   sequence in UTF-8 at byte ' . $i,
+                        E_USER_WARNING
+                    );
+
+                    return false;
+                }
+            }
+        }
+        return $out;
+    }
+
+    /**
+     * Takes an array of ints representing the Unicode characters and returns
+     * a UTF-8 string. Astral planes are supported ie. the ints in the
+     * input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates
+     * are not allowed.
+     *
+     * If $strict is set to true the function returns false if the input
+     * array contains ints that represent surrogates or are outside the
+     * Unicode range and raises a PHP error at level E_USER_WARNING
+     *
+     * Note: this function has been modified slightly in this library to use
+     * output buffering to concatenate the UTF-8 string (faster) as well as
+     * reference the array by it's keys
+     *
+     * @param  array $arr of unicode code points representing a string
+     * @param  boolean $strict Check for invalid sequences?
+     * @return string|false UTF-8 string or false if array contains invalid code points
+     *
+     * @author <hsivonen@iki.fi>
+     * @author Harry Fuecks <hfuecks@gmail.com>
+     * @see    utf8_to_unicode
+     * @link   http://hsivonen.iki.fi/php-utf8/
+     * @link   http://sourceforge.net/projects/phputf8/
+     * @todo use exceptions instead of user errors
+     */
+    public static function toUtf8($arr, $strict = false)
+    {
+        if (!is_array($arr)) return '';
+        ob_start();
+
+        foreach (array_keys($arr) as $k) {
+
+            if (($arr[$k] >= 0) && ($arr[$k] <= 0x007f)) {
+                # ASCII range (including control chars)
+
+                echo chr($arr[$k]);
+
+            } else if ($arr[$k] <= 0x07ff) {
+                # 2 byte sequence
+
+                echo chr(0xc0 | ($arr[$k] >> 6));
+                echo chr(0x80 | ($arr[$k] & 0x003f));
+
+            } else if ($arr[$k] == 0xFEFF) {
+                # Byte order mark (skip)
+                // nop -- zap the BOM
+
+            } else if ($arr[$k] >= 0xD800 && $arr[$k] <= 0xDFFF) {
+                # Test for illegal surrogates
+
+                // found a surrogate
+                if ($strict) {
+                    trigger_error(
+                        'unicode_to_utf8: Illegal surrogate ' .
+                        'at index: ' . $k . ', value: ' . $arr[$k],
+                        E_USER_WARNING
+                    );
+                    return false;
+                }
+
+            } else if ($arr[$k] <= 0xffff) {
+                # 3 byte sequence
+
+                echo chr(0xe0 | ($arr[$k] >> 12));
+                echo chr(0x80 | (($arr[$k] >> 6) & 0x003f));
+                echo chr(0x80 | ($arr[$k] & 0x003f));
+
+            } else if ($arr[$k] <= 0x10ffff) {
+                # 4 byte sequence
+
+                echo chr(0xf0 | ($arr[$k] >> 18));
+                echo chr(0x80 | (($arr[$k] >> 12) & 0x3f));
+                echo chr(0x80 | (($arr[$k] >> 6) & 0x3f));
+                echo chr(0x80 | ($arr[$k] & 0x3f));
+
+            } elseif ($strict) {
+
+                trigger_error(
+                    'unicode_to_utf8: Codepoint out of Unicode range ' .
+                    'at index: ' . $k . ', value: ' . $arr[$k],
+                    E_USER_WARNING
+                );
+
+                // out of range
+                return false;
+            }
+        }
+
+        return ob_get_clean();
+    }
+}
--- a/ap23/web/doku/inc/Utf8/tables/case.php
+++ b/ap23/web/doku/inc/Utf8/tables/case.php
@@ -0,0 +1,659 @@
+<?php
+/**
+ * UTF-8 Case lookup table
+ *
+ * This lookuptable defines the lower case letters to their corresponding
+ * upper case letter in UTF-8
+ *
+ * @author Andreas Gohr <andi@splitbrain.org>
+ */
+return [
+    'A' => 'a',
+    'B' => 'b',
+    'C' => 'c',
+    'D' => 'd',
+    'E' => 'e',
+    'F' => 'f',
+    'G' => 'g',
+    'H' => 'h',
+    'I' => 'i',
+    'J' => 'j',
+    'K' => 'k',
+    'L' => 'l',
+    'M' => 'm',
+    'N' => 'n',
+    'O' => 'o',
+    'P' => 'p',
+    'Q' => 'q',
+    'R' => 'r',
+    'S' => 's',
+    'T' => 't',
+    'U' => 'u',
+    'V' => 'v',
+    'W' => 'w',
+    'X' => 'x',
+    'Y' => 'y',
+    'Z' => 'z',
+    'À' => 'à',
+    'Á' => 'á',
+    'Â' => 'â',
+    'Ã' => 'ã',
+    'Ä' => 'ä',
+    'Å' => 'å',
+    'Æ' => 'æ',
+    'Ç' => 'ç',
+    'È' => 'è',
+    'É' => 'é',
+    'Ê' => 'ê',
+    'Ë' => 'ë',
+    'Ì' => 'ì',
+    'Í' => 'í',
+    'Î' => 'î',
+    'Ï' => 'ï',
+    'Ð' => 'ð',
+    'Ñ' => 'ñ',
+    'Ò' => 'ò',
+    'Ó' => 'ó',
+    'Ô' => 'ô',
+    'Õ' => 'õ',
+    'Ö' => 'ö',
+    'Ø' => 'ø',
+    'Ù' => 'ù',
+    'Ú' => 'ú',
+    'Û' => 'û',
+    'Ü' => 'ü',
+    'Ý' => 'ý',
+    'Þ' => 'þ',
+    'Ā' => 'ā',
+    'Ă' => 'ă',
+    'Ą' => 'ą',
+    'Ć' => 'ć',
+    'Ĉ' => 'ĉ',
+    'Ċ' => 'ċ',
+    'Č' => 'č',
+    'Ď' => 'ď',
+    'Đ' => 'đ',
+    'Ē' => 'ē',
+    'Ĕ' => 'ĕ',
+    'Ė' => 'ė',
+    'Ę' => 'ę',
+    'Ě' => 'ě',
+    'Ĝ' => 'ĝ',
+    'Ğ' => 'ğ',
+    'Ġ' => 'ġ',
+    'Ģ' => 'ģ',
+    'Ĥ' => 'ĥ',
+    'Ħ' => 'ħ',
+    'Ĩ' => 'ĩ',
+    'Ī' => 'ī',
+    'Ĭ' => 'ĭ',
+    'Į' => 'į',
+    'Ĳ' => 'ĳ',
+    'Ĵ' => 'ĵ',
+    'Ķ' => 'ķ',
+    'Ĺ' => 'ĺ',
+    'Ļ' => 'ļ',
+    'Ľ' => 'ľ',
+    'Ŀ' => 'ŀ',
+    'Ł' => 'ł',
+    'Ń' => 'ń',
+    'Ņ' => 'ņ',
+    'Ň' => 'ň',
+    'Ŋ' => 'ŋ',
+    'Ō' => 'ō',
+    'Ŏ' => 'ŏ',
+    'Ő' => 'ő',
+    'Œ' => 'œ',
+    'Ŕ' => 'ŕ',
+    'Ŗ' => 'ŗ',
+    'Ř' => 'ř',
+    'Ś' => 'ś',
+    'Ŝ' => 'ŝ',
+    'Ş' => 'ş',
+    'Š' => 'š',
+    'Ţ' => 'ţ',
+    'Ť' => 'ť',
+    'Ŧ' => 'ŧ',
+    'Ũ' => 'ũ',
+    'Ū' => 'ū',
+    'Ŭ' => 'ŭ',
+    'Ů' => 'ů',
+    'Ű' => 'ű',
+    'Ų' => 'ų',
+    'Ŵ' => 'ŵ',
+    'Ŷ' => 'ŷ',
+    'Ÿ' => 'ÿ',
+    'Ź' => 'ź',
+    'Ż' => 'ż',
+    'Ž' => 'ž',
+    'Ɓ' => 'ɓ',
+    'Ƃ' => 'ƃ',
+    'Ƅ' => 'ƅ',
+    'Ɔ' => 'ɔ',
+    'Ƈ' => 'ƈ',
+    'Ɖ' => 'ɖ',
+    'Ɗ' => 'ɗ',
+    'Ƌ' => 'ƌ',
+    'Ǝ' => 'ǝ',
+    'Ə' => 'ə',
+    'Ɛ' => 'ɛ',
+    'Ƒ' => 'ƒ',
+    'Ɣ' => 'ɣ',
+    'Ɩ' => 'ɩ',
+    'Ɨ' => 'ɨ',
+    'Ƙ' => 'ƙ',
+    'Ɯ' => 'ɯ',
+    'Ɲ' => 'ɲ',
+    'Ɵ' => 'ɵ',
+    'Ơ' => 'ơ',
+    'Ƣ' => 'ƣ',
+    'Ƥ' => 'ƥ',
+    'Ʀ' => 'ʀ',
+    'Ƨ' => 'ƨ',
+    'Ʃ' => 'ʃ',
+    'Ƭ' => 'ƭ',
+    'Ʈ' => 'ʈ',
+    'Ư' => 'ư',
+    'Ʊ' => 'ʊ',
+    'Ʋ' => 'ʋ',
+    'Ƴ' => 'ƴ',
+    'Ƶ' => 'ƶ',
+    'Ʒ' => 'ʒ',
+    'Ƹ' => 'ƹ',
+    'Ƽ' => 'ƽ',
+    'ǅ' => 'ǆ',
+    'ǈ' => 'ǉ',
+    'ǋ' => 'ǌ',
+    'Ǎ' => 'ǎ',
+    'Ǐ' => 'ǐ',
+    'Ǒ' => 'ǒ',
+    'Ǔ' => 'ǔ',
+    'Ǖ' => 'ǖ',
+    'Ǘ' => 'ǘ',
+    'Ǚ' => 'ǚ',
+    'Ǜ' => 'ǜ',
+    'Ǟ' => 'ǟ',
+    'Ǡ' => 'ǡ',
+    'Ǣ' => 'ǣ',
+    'Ǥ' => 'ǥ',
+    'Ǧ' => 'ǧ',
+    'Ǩ' => 'ǩ',
+    'Ǫ' => 'ǫ',
+    'Ǭ' => 'ǭ',
+    'Ǯ' => 'ǯ',
+    'ǲ' => 'ǳ',
+    'Ǵ' => 'ǵ',
+    'Ƕ' => 'ƕ',
+    'Ƿ' => 'ƿ',
+    'Ǹ' => 'ǹ',
+    'Ǻ' => 'ǻ',
+    'Ǽ' => 'ǽ',
+    'Ǿ' => 'ǿ',
+    'Ȁ' => 'ȁ',
+    'Ȃ' => 'ȃ',
+    'Ȅ' => 'ȅ',
+    'Ȇ' => 'ȇ',
+    'Ȉ' => 'ȉ',
+    'Ȋ' => 'ȋ',
+    'Ȍ' => 'ȍ',
+    'Ȏ' => 'ȏ',
+    'Ȑ' => 'ȑ',
+    'Ȓ' => 'ȓ',
+    'Ȕ' => 'ȕ',
+    'Ȗ' => 'ȗ',
+    'Ș' => 'ș',
+    'Ț' => 'ț',
+    'Ȝ' => 'ȝ',
+    'Ȟ' => 'ȟ',
+    'Ƞ' => 'ƞ',
+    'Ȣ' => 'ȣ',
+    'Ȥ' => 'ȥ',
+    'Ȧ' => 'ȧ',
+    'Ȩ' => 'ȩ',
+    'Ȫ' => 'ȫ',
+    'Ȭ' => 'ȭ',
+    'Ȯ' => 'ȯ',
+    'Ȱ' => 'ȱ',
+    'Ȳ' => 'ȳ',
+    'Ά' => 'ά',
+    'Έ' => 'έ',
+    'Ή' => 'ή',
+    'Ί' => 'ί',
+    'Ό' => 'ό',
+    'Ύ' => 'ύ',
+    'Ώ' => 'ώ',
+    'Α' => 'α',
+    'Β' => 'β',
+    'Γ' => 'γ',
+    'Δ' => 'δ',
+    'Ε' => 'ε',
+    'Ζ' => 'ζ',
+    'Η' => 'η',
+    'Θ' => 'θ',
+    'Ι' => 'ι',
+    'Κ' => 'κ',
+    'Λ' => 'λ',
+    'Μ' => 'μ',
+    'Ν' => 'ν',
+    'Ξ' => 'ξ',
+    'Ο' => 'ο',
+    'Π' => 'π',
+    'Ρ' => 'ρ',
+    'Σ' => 'σ',
+    'Τ' => 'τ',
+    'Υ' => 'υ',
+    'Φ' => 'φ',
+    'Χ' => 'χ',
+    'Ψ' => 'ψ',
+    'Ω' => 'ω',
+    'Ϊ' => 'ϊ',
+    'Ϋ' => 'ϋ',
+    'Ϙ' => 'ϙ',
+    'Ϛ' => 'ϛ',
+    'Ϝ' => 'ϝ',
+    'Ϟ' => 'ϟ',
+    'Ϡ' => 'ϡ',
+    'Ϣ' => 'ϣ',
+    'Ϥ' => 'ϥ',
+    'Ϧ' => 'ϧ',
+    'Ϩ' => 'ϩ',
+    'Ϫ' => 'ϫ',
+    'Ϭ' => 'ϭ',
+    'Ϯ' => 'ϯ',
+    'Ѐ' => 'ѐ',
+    'Ё' => 'ё',
+    'Ђ' => 'ђ',
+    'Ѓ' => 'ѓ',
+    'Є' => 'є',
+    'Ѕ' => 'ѕ',
+    'І' => 'і',
+    'Ї' => 'ї',
+    'Ј' => 'ј',
+    'Љ' => 'љ',
+    'Њ' => 'њ',
+    'Ћ' => 'ћ',
+    'Ќ' => 'ќ',
+    'Ѝ' => 'ѝ',
+    'Ў' => 'ў',
+    'Џ' => 'џ',
+    'А' => 'а',
+    'Б' => 'б',
+    'В' => 'в',
+    'Г' => 'г',
+    'Д' => 'д',
+    'Е' => 'е',
+    'Ж' => 'ж',
+    'З' => 'з',
+    'И' => 'и',
+    'Й' => 'й',
+    'К' => 'к',
+    'Л' => 'л',
+    'М' => 'м',
+    'Н' => 'н',
+    'О' => 'о',
+    'П' => 'п',
+    'Р' => 'р',
+    'С' => 'с',
+    'Т' => 'т',
+    'У' => 'у',
+    'Ф' => 'ф',
+    'Х' => 'х',
+    'Ц' => 'ц',
+    'Ч' => 'ч',
+    'Ш' => 'ш',
+    'Щ' => 'щ',
+    'Ъ' => 'ъ',
+    'Ы' => 'ы',
+    'Ь' => 'ь',
+    'Э' => 'э',
+    'Ю' => 'ю',
+    'Я' => 'я',
+    'Ѡ' => 'ѡ',
+    'Ѣ' => 'ѣ',
+    'Ѥ' => 'ѥ',
+    'Ѧ' => 'ѧ',
+    'Ѩ' => 'ѩ',
+    'Ѫ' => 'ѫ',
+    'Ѭ' => 'ѭ',
+    'Ѯ' => 'ѯ',
+    'Ѱ' => 'ѱ',
+    'Ѳ' => 'ѳ',
+    'Ѵ' => 'ѵ',
+    'Ѷ' => 'ѷ',
+    'Ѹ' => 'ѹ',
+    'Ѻ' => 'ѻ',
+    'Ѽ' => 'ѽ',
+    'Ѿ' => 'ѿ',
+    'Ҁ' => 'ҁ',
+    'Ҋ' => 'ҋ',
+    'Ҍ' => 'ҍ',
+    'Ҏ' => 'ҏ',
+    'Ґ' => 'ґ',
+    'Ғ' => 'ғ',
+    'Ҕ' => 'ҕ',
+    'Җ' => 'җ',
+    'Ҙ' => 'ҙ',
+    'Қ' => 'қ',
+    'Ҝ' => 'ҝ',
+    'Ҟ' => 'ҟ',
+    'Ҡ' => 'ҡ',
+    'Ң' => 'ң',
+    'Ҥ' => 'ҥ',
+    'Ҧ' => 'ҧ',
+    'Ҩ' => 'ҩ',
+    'Ҫ' => 'ҫ',
+    'Ҭ' => 'ҭ',
+    'Ү' => 'ү',
+    'Ұ' => 'ұ',
+    'Ҳ' => 'ҳ',
+    'Ҵ' => 'ҵ',
+    'Ҷ' => 'ҷ',
+    'Ҹ' => 'ҹ',
+    'Һ' => 'һ',
+    'Ҽ' => 'ҽ',
+    'Ҿ' => 'ҿ',
+    'Ӂ' => 'ӂ',
+    'Ӄ' => 'ӄ',
+    'Ӆ' => 'ӆ',
+    'Ӈ' => 'ӈ',
+    'Ӊ' => 'ӊ',
+    'Ӌ' => 'ӌ',
+    'Ӎ' => 'ӎ',
+    'Ӑ' => 'ӑ',
+    'Ӓ' => 'ӓ',
+    'Ӕ' => 'ӕ',
+    'Ӗ' => 'ӗ',
+    'Ә' => 'ә',
+    'Ӛ' => 'ӛ',
+    'Ӝ' => 'ӝ',
+    'Ӟ' => 'ӟ',
+    'Ӡ' => 'ӡ',
+    'Ӣ' => 'ӣ',
+    'Ӥ' => 'ӥ',
+    'Ӧ' => 'ӧ',
+    'Ө' => 'ө',
+    'Ӫ' => 'ӫ',
+    'Ӭ' => 'ӭ',
+    'Ӯ' => 'ӯ',
+    'Ӱ' => 'ӱ',
+    'Ӳ' => 'ӳ',
+    'Ӵ' => 'ӵ',
+    'Ӹ' => 'ӹ',
+    'Ԁ' => 'ԁ',
+    'Ԃ' => 'ԃ',
+    'Ԅ' => 'ԅ',
+    'Ԇ' => 'ԇ',
+    'Ԉ' => 'ԉ',
+    'Ԋ' => 'ԋ',
+    'Ԍ' => 'ԍ',
+    'Ԏ' => 'ԏ',
+    'Ա' => 'ա',
+    'Բ' => 'բ',
+    'Գ' => 'գ',
+    'Դ' => 'դ',
+    'Ե' => 'ե',
+    'Զ' => 'զ',
+    'Է' => 'է',
+    'Ը' => 'ը',
+    'Թ' => 'թ',
+    'Ժ' => 'ժ',
+    'Ի' => 'ի',
+    'Լ' => 'լ',
+    'Խ' => 'խ',
+    'Ծ' => 'ծ',
+    'Կ' => 'կ',
+    'Հ' => 'հ',
+    'Ձ' => 'ձ',
+    'Ղ' => 'ղ',
+    'Ճ' => 'ճ',
+    'Մ' => 'մ',
+    'Յ' => 'յ',
+    'Ն' => 'ն',
+    'Շ' => 'շ',
+    'Ո' => 'ո',
+    'Չ' => 'չ',
+    'Պ' => 'պ',
+    'Ջ' => 'ջ',
+    'Ռ' => 'ռ',
+    'Ս' => 'ս',
+    'Վ' => 'վ',
+    'Տ' => 'տ',
+    'Ր' => 'ր',
+    'Ց' => 'ց',
+    'Ւ' => 'ւ',
+    'Փ' => 'փ',
+    'Ք' => 'ք',
+    'Օ' => 'օ',
+    'Ֆ' => 'ֆ',
+    'Ḁ' => 'ḁ',
+    'Ḃ' => 'ḃ',
+    'Ḅ' => 'ḅ',
+    'Ḇ' => 'ḇ',
+    'Ḉ' => 'ḉ',
+    'Ḋ' => 'ḋ',
+    'Ḍ' => 'ḍ',
+    'Ḏ' => 'ḏ',
+    'Ḑ' => 'ḑ',
+    'Ḓ' => 'ḓ',
+    'Ḕ' => 'ḕ',
+    'Ḗ' => 'ḗ',
+    'Ḙ' => 'ḙ',
+    'Ḛ' => 'ḛ',
+    'Ḝ' => 'ḝ',
+    'Ḟ' => 'ḟ',
+    'Ḡ' => 'ḡ',
+    'Ḣ' => 'ḣ',
+    'Ḥ' => 'ḥ',
+    'Ḧ' => 'ḧ',
+    'Ḩ' => 'ḩ',
+    'Ḫ' => 'ḫ',
+    'Ḭ' => 'ḭ',
+    'Ḯ' => 'ḯ',
+    'Ḱ' => 'ḱ',
+    'Ḳ' => 'ḳ',
+    'Ḵ' => 'ḵ',
+    'Ḷ' => 'ḷ',
+    'Ḹ' => 'ḹ',
+    'Ḻ' => 'ḻ',
+    'Ḽ' => 'ḽ',
+    'Ḿ' => 'ḿ',
+    'Ṁ' => 'ṁ',
+    'Ṃ' => 'ṃ',
+    'Ṅ' => 'ṅ',
+    'Ṇ' => 'ṇ',
+    'Ṉ' => 'ṉ',
+    'Ṋ' => 'ṋ',
+    'Ṍ' => 'ṍ',
+    'Ṏ' => 'ṏ',
+    'Ṑ' => 'ṑ',
+    'Ṓ' => 'ṓ',
+    'Ṕ' => 'ṕ',
+    'Ṗ' => 'ṗ',
+    'Ṙ' => 'ṙ',
+    'Ṛ' => 'ṛ',
+    'Ṝ' => 'ṝ',
+    'Ṟ' => 'ṟ',
+    'Ṡ' => 'ṡ',
+    'Ṣ' => 'ṣ',
+    'Ṥ' => 'ṥ',
+    'Ṧ' => 'ṧ',
+    'Ṩ' => 'ṩ',
+    'Ṫ' => 'ṫ',
+    'Ṭ' => 'ṭ',
+    'Ṯ' => 'ṯ',
+    'Ṱ' => 'ṱ',
+    'Ṳ' => 'ṳ',
+    'Ṵ' => 'ṵ',
+    'Ṷ' => 'ṷ',
+    'Ṹ' => 'ṹ',
+    'Ṻ' => 'ṻ',
+    'Ṽ' => 'ṽ',
+    'Ṿ' => 'ṿ',
+    'Ẁ' => 'ẁ',
+    'Ẃ' => 'ẃ',
+    'Ẅ' => 'ẅ',
+    'Ẇ' => 'ẇ',
+    'Ẉ' => 'ẉ',
+    'Ẋ' => 'ẋ',
+    'Ẍ' => 'ẍ',
+    'Ẏ' => 'ẏ',
+    'Ẑ' => 'ẑ',
+    'Ẓ' => 'ẓ',
+    'Ẕ' => 'ẕ',
+    'Ạ' => 'ạ',
+    'Ả' => 'ả',
+    'Ấ' => 'ấ',
+    'Ầ' => 'ầ',
+    'Ẩ' => 'ẩ',
+    'Ẫ' => 'ẫ',
+    'Ậ' => 'ậ',
+    'Ắ' => 'ắ',
+    'Ằ' => 'ằ',
+    'Ẳ' => 'ẳ',
+    'Ẵ' => 'ẵ',
+    'Ặ' => 'ặ',
+    'Ẹ' => 'ẹ',
+    'Ẻ' => 'ẻ',
+    'Ẽ' => 'ẽ',
+    'Ế' => 'ế',
+    'Ề' => 'ề',
+    'Ể' => 'ể',
+    'Ễ' => 'ễ',
+    'Ệ' => 'ệ',
+    'Ỉ' => 'ỉ',
+    'Ị' => 'ị',
+    'Ọ' => 'ọ',
+    'Ỏ' => 'ỏ',
+    'Ố' => 'ố',
+    'Ồ' => 'ồ',
+    'Ổ' => 'ổ',
+    'Ỗ' => 'ỗ',
+    'Ộ' => 'ộ',
+    'Ớ' => 'ớ',
+    'Ờ' => 'ờ',
+    'Ở' => 'ở',
+    'Ỡ' => 'ỡ',
+    'Ợ' => 'ợ',
+    'Ụ' => 'ụ',
+    'Ủ' => 'ủ',
+    'Ứ' => 'ứ',
+    'Ừ' => 'ừ',
+    'Ử' => 'ử',
+    'Ữ' => 'ữ',
+    'Ự' => 'ự',
+    'Ỳ' => 'ỳ',
+    'Ỵ' => 'ỵ',
+    'Ỷ' => 'ỷ',
+    'Ỹ' => 'ỹ',
+    'Ἀ' => 'ἀ',
+    'Ἁ' => 'ἁ',
+    'Ἂ' => 'ἂ',
+    'Ἃ' => 'ἃ',
+    'Ἄ' => 'ἄ',
+    'Ἅ' => 'ἅ',
+    'Ἆ' => 'ἆ',
+    'Ἇ' => 'ἇ',
+    'Ἐ' => 'ἐ',
+    'Ἑ' => 'ἑ',
+    'Ἒ' => 'ἒ',
+    'Ἓ' => 'ἓ',
+    'Ἔ' => 'ἔ',
+    'Ἕ' => 'ἕ',
+    'Ἡ' => 'ἡ',
+    'Ἢ' => 'ἢ',
+    'Ἣ' => 'ἣ',
+    'Ἤ' => 'ἤ',
+    'Ἥ' => 'ἥ',
+    'Ἦ' => 'ἦ',
+    'Ἧ' => 'ἧ',
+    'Ἰ' => 'ἰ',
+    'Ἱ' => 'ἱ',
+    'Ἲ' => 'ἲ',
+    'Ἳ' => 'ἳ',
+    'Ἴ' => 'ἴ',
+    'Ἵ' => 'ἵ',
+    'Ἶ' => 'ἶ',
+    'Ἷ' => 'ἷ',
+    'Ὀ' => 'ὀ',
+    'Ὁ' => 'ὁ',
+    'Ὂ' => 'ὂ',
+    'Ὃ' => 'ὃ',
+    'Ὄ' => 'ὄ',
+    'Ὅ' => 'ὅ',
+    'Ὑ' => 'ὑ',
+    'Ὓ' => 'ὓ',
+    'Ὕ' => 'ὕ',
+    'Ὗ' => 'ὗ',
+    'Ὡ' => 'ὡ',
+    'Ὢ' => 'ὢ',
+    'Ὣ' => 'ὣ',
+    'Ὤ' => 'ὤ',
+    'Ὥ' => 'ὥ',
+    'Ὦ' => 'ὦ',
+    'Ὧ' => 'ὧ',
+    'ᾈ' => 'ᾀ',
+    'ᾉ' => 'ᾁ',
+    'ᾊ' => 'ᾂ',
+    'ᾋ' => 'ᾃ',
+    'ᾌ' => 'ᾄ',
+    'ᾍ' => 'ᾅ',
+    'ᾎ' => 'ᾆ',
+    'ᾏ' => 'ᾇ',
+    'ᾘ' => 'ᾐ',
+    'ᾙ' => 'ᾑ',
+    'ᾚ' => 'ᾒ',
+    'ᾛ' => 'ᾓ',
+    'ᾜ' => 'ᾔ',
+    'ᾝ' => 'ᾕ',
+    'ᾞ' => 'ᾖ',
+    'ᾟ' => 'ᾗ',
+    'ᾩ' => 'ᾡ',
+    'ᾪ' => 'ᾢ',
+    'ᾫ' => 'ᾣ',
+    'ᾬ' => 'ᾤ',
+    'ᾭ' => 'ᾥ',
+    'ᾮ' => 'ᾦ',
+    'ᾯ' => 'ᾧ',
+    'Ᾰ' => 'ᾰ',
+    'Ᾱ' => 'ᾱ',
+    'Ὰ' => 'ὰ',
+    'ᾼ' => 'ᾳ',
+    'Ὲ' => 'ὲ',
+    'Ὴ' => 'ὴ',
+    'ῌ' => 'ῃ',
+    'Ῐ' => 'ῐ',
+    'Ῑ' => 'ῑ',
+    'Ὶ' => 'ὶ',
+    'Ῡ' => 'ῡ',
+    'Ὺ' => 'ὺ',
+    'Ῥ' => 'ῥ',
+    'Ὸ' => 'ὸ',
+    'Ὼ' => 'ὼ',
+    'ῼ' => 'ῳ',
+    'Ａ' => 'ａ',
+    'Ｂ' => 'ｂ',
+    'Ｃ' => 'ｃ',
+    'Ｄ' => 'ｄ',
+    'Ｅ' => 'ｅ',
+    'Ｆ' => 'ｆ',
+    'Ｇ' => 'ｇ',
+    'Ｈ' => 'ｈ',
+    'Ｉ' => 'ｉ',
+    'Ｊ' => 'ｊ',
+    'Ｋ' => 'ｋ',
+    'Ｌ' => 'ｌ',
+    'Ｍ' => 'ｍ',
+    'Ｎ' => 'ｎ',
+    'Ｏ' => 'ｏ',
+    'Ｐ' => 'ｐ',
+    'Ｑ' => 'ｑ',
+    'Ｒ' => 'ｒ',
+    'Ｓ' => 'ｓ',
+    'Ｔ' => 'ｔ',
+    'Ｕ' => 'ｕ',
+    'Ｖ' => 'ｖ',
+    'Ｗ' => 'ｗ',
+    'Ｘ' => 'ｘ',
+    'Ｙ' => 'ｙ',
+    'Ｚ' => 'ｚ',
+];
--- a/ap23/web/doku/inc/Utf8/tables/loweraccents.php
+++ b/ap23/web/doku/inc/Utf8/tables/loweraccents.php
@@ -0,0 +1,116 @@
+<?php
+/**
+ * UTF-8 lookup table for lower case accented letters
+ *
+ * This lookuptable defines replacements for accented characters from the ASCII-7
+ * range. This are lower case letters only.
+ *
+ * @author Andreas Gohr <andi@splitbrain.org>
+ * @see    \dokuwiki\Utf8\Clean::deaccent()
+ */
+return [
+    'á' => 'a',
+    'à' => 'a',
+    'ă' => 'a',
+    'â' => 'a',
+    'å' => 'a',
+    'ä' => 'ae',
+    'ã' => 'a',
+    'ą' => 'a',
+    'ā' => 'a',
+    'æ' => 'ae',
+    'ḃ' => 'b',
+    'ć' => 'c',
+    'ĉ' => 'c',
+    'č' => 'c',
+    'ċ' => 'c',
+    'ç' => 'c',
+    'ď' => 'd',
+    'ḋ' => 'd',
+    'đ' => 'd',
+    'ð' => 'dh',
+    'é' => 'e',
+    'è' => 'e',
+    'ĕ' => 'e',
+    'ê' => 'e',
+    'ě' => 'e',
+    'ë' => 'e',
+    'ė' => 'e',
+    'ę' => 'e',
+    'ē' => 'e',
+    'ḟ' => 'f',
+    'ƒ' => 'f',
+    'ğ' => 'g',
+    'ĝ' => 'g',
+    'ġ' => 'g',
+    'ģ' => 'g',
+    'ĥ' => 'h',
+    'ħ' => 'h',
+    'í' => 'i',
+    'ì' => 'i',
+    'î' => 'i',
+    'ï' => 'i',
+    'ĩ' => 'i',
+    'į' => 'i',
+    'ī' => 'i',
+    'ĵ' => 'j',
+    'ķ' => 'k',
+    'ĺ' => 'l',
+    'ľ' => 'l',
+    'ļ' => 'l',
+    'ł' => 'l',
+    'ṁ' => 'm',
+    'ń' => 'n',
+    'ň' => 'n',
+    'ñ' => 'n',
+    'ņ' => 'n',
+    'ó' => 'o',
+    'ò' => 'o',
+    'ô' => 'o',
+    'ö' => 'oe',
+    'ő' => 'o',
+    'õ' => 'o',
+    'ø' => 'o',
+    'ō' => 'o',
+    'ơ' => 'o',
+    'ṗ' => 'p',
+    'ŕ' => 'r',
+    'ř' => 'r',
+    'ŗ' => 'r',
+    'ś' => 's',
+    'ŝ' => 's',
+    'š' => 's',
+    'ṡ' => 's',
+    'ş' => 's',
+    'ș' => 's',
+    'ß' => 'ss',
+    'ť' => 't',
+    'ṫ' => 't',
+    'ţ' => 't',
+    'ț' => 't',
+    'ŧ' => 't',
+    'ú' => 'u',
+    'ù' => 'u',
+    'ŭ' => 'u',
+    'û' => 'u',
+    'ů' => 'u',
+    'ü' => 'ue',
+    'ű' => 'u',
+    'ũ' => 'u',
+    'ų' => 'u',
+    'ū' => 'u',
+    'ư' => 'u',
+    'ẃ' => 'w',
+    'ẁ' => 'w',
+    'ŵ' => 'w',
+    'ẅ' => 'w',
+    'ý' => 'y',
+    'ỳ' => 'y',
+    'ŷ' => 'y',
+    'ÿ' => 'y',
+    'ź' => 'z',
+    'ž' => 'z',
+    'ż' => 'z',
+    'þ' => 'th',
+    'µ' => 'u',
+];
--- a/ap23/web/doku/inc/Utf8/tables/romanization.php
+++ b/ap23/web/doku/inc/Utf8/tables/romanization.php
--- a/ap23/web/doku/inc/Utf8/tables/specials.php
+++ b/ap23/web/doku/inc/Utf8/tables/specials.php
@@ -0,0 +1,615 @@
+<?php
+/**
+ * UTF-8 array of common special characters
+ *
+ * This array should contain all special characters (not a letter or digit)
+ * defined in the various local charsets - it's not a complete list of non-alphanum
+ * characters in UTF-8. It's not perfect but should match most cases of special
+ * chars.
+ *
+ * The controlchars 0x00 to 0x19 are _not_ included in this array. The space 0x20 is!
+ * These chars are _not_ in the array either:  _ (0x5f), : 0x3a, . 0x2e, - 0x2d, * 0x2a
+ *
+ * @author Andreas Gohr <andi@splitbrain.org>
+ * @see    \dokuwiki\Utf8\Clean::stripspecials()
+ */
+return [
+    0x1a, // 
+    0x1b, // 
+    0x1c, // 
+    0x1d, // 
+    0x1e, // 
+    0x1f, // 
+    0x20, // <space>
+    0x21, // !
+    0x22, // "
+    0x23, // #
+    0x24, // $
+    0x25, // %
+    0x26, // &
+    0x27, // '
+    0x28, // (
+    0x29, // )
+    0x2b, // +
+    0x2c, // ,
+    0x2f, // /
+    0x3b, // ;
+    0x3c, // <
+    0x3d, // =
+    0x3e, // >
+    0x3f, // ?
+    0x40, // @
+    0x5b, // [
+    0x5c, // \
+    0x5d, // ]
+    0x5e, // ^
+    0x60, // `
+    0x7b, // {
+    0x7c, // |
+    0x7d, // }
+    0x7e, // ~
+    0x7f, // 
+    0x80, // 
+    0x81, // 
+    0x82, // 
+    0x83, // 
+    0x84, // 
+    0x85, // 
+    0x86, // 
+    0x87, // 
+    0x88, // 
+    0x89, // 
+    0x8a, // 
+    0x8b, // 
+    0x8c, // 
+    0x8d, // 
+    0x8e, // 
+    0x8f, // 
+    0x90, // 
+    0x91, // 
+    0x92, // 
+    0x93, // 
+    0x94, // 
+    0x95, // 
+    0x96, // 
+    0x97, // 
+    0x98, // 
+    0x99, // 
+    0x9a, // 
+    0x9b, // 
+    0x9c, // 
+    0x9d, // 
+    0x9e, // 
+    0x9f, // 
+    0xa0, //  
+    0xa1, // ¡
+    0xa2, // ¢
+    0xa3, // £
+    0xa4, // ¤
+    0xa5, // ¥
+    0xa6, // ¦
+    0xa7, // §
+    0xa8, // ¨
+    0xa9, // ©
+    0xaa, // ª
+    0xab, // «
+    0xac, // ¬
+    0xad, // 
+    0xae, // ®
+    0xaf, // ¯
+    0xb0, // °
+    0xb1, // ±
+    0xb2, // ²
+    0xb3, // ³
+    0xb4, // ´
+    0xb5, // µ
+    0xb6, // ¶
+    0xb7, // ·
+    0xb8, // ¸
+    0xb9, // ¹
+    0xba, // º
+    0xbb, // »
+    0xbc, // ¼
+    0xbd, // ½
+    0xbe, // ¾
+    0xbf, // ¿
+    0xd7, // ×
+    0xf7, // ÷
+    0x2c7, // ˇ
+    0x2d8, // ˘
+    0x2d9, // ˙
+    0x2da, // ˚
+    0x2db, // ˛
+    0x2dc, // ˜
+    0x2dd, // ˝
+    0x300, // ̀
+    0x301, // ́
+    0x303, // ̃
+    0x309, // ̉
+    0x323, // ̣
+    0x384, // ΄
+    0x385, // ΅
+    0x387, // ·
+    0x5b0, // ְ
+    0x5b1, // ֱ
+    0x5b2, // ֲ
+    0x5b3, // ֳ
+    0x5b4, // ִ
+    0x5b5, // ֵ
+    0x5b6, // ֶ
+    0x5b7, // ַ
+    0x5b8, // ָ
+    0x5b9, // ֹ
+    0x5bb, // ֻ
+    0x5bc, // ּ
+    0x5bd, // ֽ
+    0x5be, // ־
+    0x5bf, // ֿ
+    0x5c0, // ׀
+    0x5c1, // ׁ
+    0x5c2, // ׂ
+    0x5c3, // ׃
+    0x5f3, // ׳
+    0x5f4, // ״
+    0x60c, // ،
+    0x61b, // ؛
+    0x61f, // ؟
+    0x640, // ـ
+    0x64b, // ً
+    0x64c, // ٌ
+    0x64d, // ٍ
+    0x64e, // َ
+    0x64f, // ُ
+    0x650, // ِ
+    0x651, // ّ
+    0x652, // ْ
+    0x66a, // ٪
+    0xe3f, // ฿
+    0x200c, // ‌
+    0x200d, // ‍
+    0x200e, // ‎
+    0x200f, // ‏
+    0x2013, // –
+    0x2014, // —
+    0x2015, // ―
+    0x2017, // ‗
+    0x2018, // ‘
+    0x2019, // ’
+    0x201a, // ‚
+    0x201c, // “
+    0x201d, // ”
+    0x201e, // „
+    0x2020, // †
+    0x2021, // ‡
+    0x2022, // •
+    0x2026, // …
+    0x2030, // ‰
+    0x2032, // ′
+    0x2033, // ″
+    0x2039, // ‹
+    0x203a, // ›
+    0x2044, // ⁄
+    0x20a7, // ₧
+    0x20aa, // ₪
+    0x20ab, // ₫
+    0x20ac, // €
+    0x2116, // №
+    0x2118, // ℘
+    0x2122, // ™
+    0x2126, // Ω
+    0x2135, // ℵ
+    0x2190, // ←
+    0x2191, // ↑
+    0x2192, // →
+    0x2193, // ↓
+    0x2194, // ↔
+    0x2195, // ↕
+    0x21b5, // ↵
+    0x21d0, // ⇐
+    0x21d1, // ⇑
+    0x21d2, // ⇒
+    0x21d3, // ⇓
+    0x21d4, // ⇔
+    0x2200, // ∀
+    0x2202, // ∂
+    0x2203, // ∃
+    0x2205, // ∅
+    0x2206, // ∆
+    0x2207, // ∇
+    0x2208, // ∈
+    0x2209, // ∉
+    0x220b, // ∋
+    0x220f, // ∏
+    0x2211, // ∑
+    0x2212, // −
+    0x2215, // ∕
+    0x2217, // ∗
+    0x2219, // ∙
+    0x221a, // √
+    0x221d, // ∝
+    0x221e, // ∞
+    0x2220, // ∠
+    0x2227, // ∧
+    0x2228, // ∨
+    0x2229, // ∩
+    0x222a, // ∪
+    0x222b, // ∫
+    0x2234, // ∴
+    0x223c, // ∼
+    0x2245, // ≅
+    0x2248, // ≈
+    0x2260, // ≠
+    0x2261, // ≡
+    0x2264, // ≤
+    0x2265, // ≥
+    0x2282, // ⊂
+    0x2283, // ⊃
+    0x2284, // ⊄
+    0x2286, // ⊆
+    0x2287, // ⊇
+    0x2295, // ⊕
+    0x2297, // ⊗
+    0x22a5, // ⊥
+    0x22c5, // ⋅
+    0x2310, // ⌐
+    0x2320, // ⌠
+    0x2321, // ⌡
+    0x2329, // 〈
+    0x232a, // 〉
+    0x2469, // ⑩
+    0x2500, // ─
+    0x2502, // │
+    0x250c, // ┌
+    0x2510, // ┐
+    0x2514, // └
+    0x2518, // ┘
+    0x251c, // ├
+    0x2524, // ┤
+    0x252c, // ┬
+    0x2534, // ┴
+    0x253c, // ┼
+    0x2550, // ═
+    0x2551, // ║
+    0x2552, // ╒
+    0x2553, // ╓
+    0x2554, // ╔
+    0x2555, // ╕
+    0x2556, // ╖
+    0x2557, // ╗
+    0x2558, // ╘
+    0x2559, // ╙
+    0x255a, // ╚
+    0x255b, // ╛
+    0x255c, // ╜
+    0x255d, // ╝
+    0x255e, // ╞
+    0x255f, // ╟
+    0x2560, // ╠
+    0x2561, // ╡
+    0x2562, // ╢
+    0x2563, // ╣
+    0x2564, // ╤
+    0x2565, // ╥
+    0x2566, // ╦
+    0x2567, // ╧
+    0x2568, // ╨
+    0x2569, // ╩
+    0x256a, // ╪
+    0x256b, // ╫
+    0x256c, // ╬
+    0x2580, // ▀
+    0x2584, // ▄
+    0x2588, // █
+    0x258c, // ▌
+    0x2590, // ▐
+    0x2591, // ░
+    0x2592, // ▒
+    0x2593, // ▓
+    0x25a0, // ■
+    0x25b2, // ▲
+    0x25bc, // ▼
+    0x25c6, // ◆
+    0x25ca, // ◊
+    0x25cf, // ●
+    0x25d7, // ◗
+    0x2605, // ★
+    0x260e, // ☎
+    0x261b, // ☛
+    0x261e, // ☞
+    0x2660, // ♠
+    0x2663, // ♣
+    0x2665, // ♥
+    0x2666, // ♦
+    0x2701, // ✁
+    0x2702, // ✂
+    0x2703, // ✃
+    0x2704, // ✄
+    0x2706, // ✆
+    0x2707, // ✇
+    0x2708, // ✈
+    0x2709, // ✉
+    0x270c, // ✌
+    0x270d, // ✍
+    0x270e, // ✎
+    0x270f, // ✏
+    0x2710, // ✐
+    0x2711, // ✑
+    0x2712, // ✒
+    0x2713, // ✓
+    0x2714, // ✔
+    0x2715, // ✕
+    0x2716, // ✖
+    0x2717, // ✗
+    0x2718, // ✘
+    0x2719, // ✙
+    0x271a, // ✚
+    0x271b, // ✛
+    0x271c, // ✜
+    0x271d, // ✝
+    0x271e, // ✞
+    0x271f, // ✟
+    0x2720, // ✠
+    0x2721, // ✡
+    0x2722, // ✢
+    0x2723, // ✣
+    0x2724, // ✤
+    0x2725, // ✥
+    0x2726, // ✦
+    0x2727, // ✧
+    0x2729, // ✩
+    0x272a, // ✪
+    0x272b, // ✫
+    0x272c, // ✬
+    0x272d, // ✭
+    0x272e, // ✮
+    0x272f, // ✯
+    0x2730, // ✰
+    0x2731, // ✱
+    0x2732, // ✲
+    0x2733, // ✳
+    0x2734, // ✴
+    0x2735, // ✵
+    0x2736, // ✶
+    0x2737, // ✷
+    0x2738, // ✸
+    0x2739, // ✹
+    0x273a, // ✺
+    0x273b, // ✻
+    0x273c, // ✼
+    0x273d, // ✽
+    0x273e, // ✾
+    0x273f, // ✿
+    0x2740, // ❀
+    0x2741, // ❁
+    0x2742, // ❂
+    0x2743, // ❃
+    0x2744, // ❄
+    0x2745, // ❅
+    0x2746, // ❆
+    0x2747, // ❇
+    0x2748, // ❈
+    0x2749, // ❉
+    0x274a, // ❊
+    0x274b, // ❋
+    0x274d, // ❍
+    0x274f, // ❏
+    0x2750, // ❐
+    0x2751, // ❑
+    0x2752, // ❒
+    0x2756, // ❖
+    0x2758, // ❘
+    0x2759, // ❙
+    0x275a, // ❚
+    0x275b, // ❛
+    0x275c, // ❜
+    0x275d, // ❝
+    0x275e, // ❞
+    0x2761, // ❡
+    0x2762, // ❢
+    0x2763, // ❣
+    0x2764, // ❤
+    0x2765, // ❥
+    0x2766, // ❦
+    0x2767, // ❧
+    0x277f, // ❿
+    0x2789, // ➉
+    0x2793, // ➓
+    0x2794, // ➔
+    0x2798, // ➘
+    0x2799, // ➙
+    0x279a, // ➚
+    0x279b, // ➛
+    0x279c, // ➜
+    0x279d, // ➝
+    0x279e, // ➞
+    0x279f, // ➟
+    0x27a0, // ➠
+    0x27a1, // ➡
+    0x27a2, // ➢
+    0x27a3, // ➣
+    0x27a4, // ➤
+    0x27a5, // ➥
+    0x27a6, // ➦
+    0x27a7, // ➧
+    0x27a8, // ➨
+    0x27a9, // ➩
+    0x27aa, // ➪
+    0x27ab, // ➫
+    0x27ac, // ➬
+    0x27ad, // ➭
+    0x27ae, // ➮
+    0x27af, // ➯
+    0x27b1, // ➱
+    0x27b2, // ➲
+    0x27b3, // ➳
+    0x27b4, // ➴
+    0x27b5, // ➵
+    0x27b6, // ➶
+    0x27b7, // ➷
+    0x27b8, // ➸
+    0x27b9, // ➹
+    0x27ba, // ➺
+    0x27bb, // ➻
+    0x27bc, // ➼
+    0x27bd, // ➽
+    0x27be, // ➾
+    0x3000, // 　
+    0x3001, // 、
+    0x3002, // 。
+    0x3003, // 〃
+    0x3008, // 〈
+    0x3009, // 〉
+    0x300a, // 《
+    0x300b, // 》
+    0x300c, // 「
+    0x300d, // 」
+    0x300e, // 『
+    0x300f, // 』
+    0x3010, // 【
+    0x3011, // 】
+    0x3012, // 〒
+    0x3014, // 〔
+    0x3015, // 〕
+    0x3016, // 〖
+    0x3017, // 〗
+    0x3018, // 〘
+    0x3019, // 〙
+    0x301a, // 〚
+    0x301b, // 〛
+    0x3036, // 〶
+    0xf6d9, // 
+    0xf6da, // 
+    0xf6db, // 
+    0xf8d7, // 
+    0xf8d8, // 
+    0xf8d9, // 
+    0xf8da, // 
+    0xf8db, // 
+    0xf8dc, // 
+    0xf8dd, // 
+    0xf8de, // 
+    0xf8df, // 
+    0xf8e0, // 
+    0xf8e1, // 
+    0xf8e2, // 
+    0xf8e3, // 
+    0xf8e4, // 
+    0xf8e5, // 
+    0xf8e6, // 
+    0xf8e7, // 
+    0xf8e8, // 
+    0xf8e9, // 
+    0xf8ea, // 
+    0xf8eb, // 
+    0xf8ec, // 
+    0xf8ed, // 
+    0xf8ee, // 
+    0xf8ef, // 
+    0xf8f0, // 
+    0xf8f1, // 
+    0xf8f2, // 
+    0xf8f3, // 
+    0xf8f4, // 
+    0xf8f5, // 
+    0xf8f6, // 
+    0xf8f7, // 
+    0xf8f8, // 
+    0xf8f9, // 
+    0xf8fa, // 
+    0xf8fb, // 
+    0xf8fc, // 
+    0xf8fd, // 
+    0xf8fe, // 
+    0xfe7c, // ﹼ
+    0xfe7d, // ﹽ
+    0xff01, // ！
+    0xff02, // ＂
+    0xff03, // ＃
+    0xff04, // ＄
+    0xff05, // ％
+    0xff06, // ＆
+    0xff07, // ＇
+    0xff08, // （
+    0xff09, // ）
+    0xff09, // ）
+    0xff0a, // ＊
+    0xff0b, // ＋
+    0xff0c, // ，
+    0xff0d, // －
+    0xff0e, // ．
+    0xff0f, // ／
+    0xff1a, // ：
+    0xff1b, // ；
+    0xff1c, // ＜
+    0xff1d, // ＝
+    0xff1e, // ＞
+    0xff1f, // ？
+    0xff20, // ＠
+    0xff3b, // ［
+    0xff3c, // ＼
+    0xff3d, // ］
+    0xff3e, // ＾
+    0xff40, // ｀
+    0xff5b, // ｛
+    0xff5c, // ｜
+    0xff5d, // ｝
+    0xff5e, // ～
+    0xff5f, // ｟
+    0xff60, // ｠
+    0xff61, // ｡
+    0xff62, // ｢
+    0xff63, // ｣
+    0xff64, // ､
+    0xff65, // ･
+    0xffe0, // ￠
+    0xffe1, // ￡
+    0xffe2, // ￢
+    0xffe3, // ￣
+    0xffe4, // ￤
+    0xffe5, // ￥
+    0xffe6, // ￦
+    0xffe8, // ￨
+    0xffe9, // ￩
+    0xffea, // ￪
+    0xffeb, // ￫
+    0xffec, // ￬
+    0xffed, // ￭
+    0xffee, // ￮
+    0x1d6fc, // 𝛼
+    0x1d6fd, // 𝛽
+    0x1d6fe, // 𝛾
+    0x1d6ff, // 𝛿
+    0x1d700, // 𝜀
+    0x1d701, // 𝜁
+    0x1d702, // 𝜂
+    0x1d703, // 𝜃
+    0x1d704, // 𝜄
+    0x1d705, // 𝜅
+    0x1d706, // 𝜆
+    0x1d707, // 𝜇
+    0x1d708, // 𝜈
+    0x1d709, // 𝜉
+    0x1d70a, // 𝜊
+    0x1d70b, // 𝜋
+    0x1d70c, // 𝜌
+    0x1d70d, // 𝜍
+    0x1d70e, // 𝜎
+    0x1d70f, // 𝜏
+    0x1d710, // 𝜐
+    0x1d711, // 𝜑
+    0x1d712, // 𝜒
+    0x1d713, // 𝜓
+    0x1d714, // 𝜔
+    0x1d715, // 𝜕
+    0x1d716, // 𝜖
+    0x1d717, // 𝜗
+    0x1d718, // 𝜘
+    0x1d719, // 𝜙
+    0x1d71a, // 𝜚
+    0x1d71b, // 𝜛
+    0xc2a0, // 슠
+    0xe28087, //
+    0xe280af, //
+    0xe281a0, //
+    0xefbbbf, //
+];
--- a/ap23/web/doku/inc/Utf8/tables/upperaccents.php
+++ b/ap23/web/doku/inc/Utf8/tables/upperaccents.php
@@ -0,0 +1,114 @@
+<?php
+/**
+ * UTF-8 lookup table for upper case accented letters
+ *
+ * This lookuptable defines replacements for accented characters from the ASCII-7
+ * range. This are upper case letters only.
+ *
+ * @author Andreas Gohr <andi@splitbrain.org>
+ * @see    \dokuwiki\Utf8\Clean::deaccent()
+ */
+return [
+    'Á' => 'A',
+    'À' => 'A',
+    'Ă' => 'A',
+    'Â' => 'A',
+    'Å' => 'A',
+    'Ä' => 'Ae',
+    'Ã' => 'A',
+    'Ą' => 'A',
+    'Ā' => 'A',
+    'Æ' => 'Ae',
+    'Ḃ' => 'B',
+    'Ć' => 'C',
+    'Ĉ' => 'C',
+    'Č' => 'C',
+    'Ċ' => 'C',
+    'Ç' => 'C',
+    'Ď' => 'D',
+    'Ḋ' => 'D',
+    'Đ' => 'D',
+    'Ð' => 'Dh',
+    'É' => 'E',
+    'È' => 'E',
+    'Ĕ' => 'E',
+    'Ê' => 'E',
+    'Ě' => 'E',
+    'Ë' => 'E',
+    'Ė' => 'E',
+    'Ę' => 'E',
+    'Ē' => 'E',
+    'Ḟ' => 'F',
+    'Ƒ' => 'F',
+    'Ğ' => 'G',
+    'Ĝ' => 'G',
+    'Ġ' => 'G',
+    'Ģ' => 'G',
+    'Ĥ' => 'H',
+    'Ħ' => 'H',
+    'Í' => 'I',
+    'Ì' => 'I',
+    'Î' => 'I',
+    'Ï' => 'I',
+    'Ĩ' => 'I',
+    'Į' => 'I',
+    'Ī' => 'I',
+    'Ĵ' => 'J',
+    'Ķ' => 'K',
+    'Ĺ' => 'L',
+    'Ľ' => 'L',
+    'Ļ' => 'L',
+    'Ł' => 'L',
+    'Ṁ' => 'M',
+    'Ń' => 'N',
+    'Ň' => 'N',
+    'Ñ' => 'N',
+    'Ņ' => 'N',
+    'Ó' => 'O',
+    'Ò' => 'O',
+    'Ô' => 'O',
+    'Ö' => 'Oe',
+    'Ő' => 'O',
+    'Õ' => 'O',
+    'Ø' => 'O',
+    'Ō' => 'O',
+    'Ơ' => 'O',
+    'Ṗ' => 'P',
+    'Ŕ' => 'R',
+    'Ř' => 'R',
+    'Ŗ' => 'R',
+    'Ś' => 'S',
+    'Ŝ' => 'S',
+    'Š' => 'S',
+    'Ṡ' => 'S',
+    'Ş' => 'S',
+    'Ș' => 'S',
+    'Ť' => 'T',
+    'Ṫ' => 'T',
+    'Ţ' => 'T',
+    'Ț' => 'T',
+    'Ŧ' => 'T',
+    'Ú' => 'U',
+    'Ù' => 'U',
+    'Ŭ' => 'U',
+    'Û' => 'U',
+    'Ů' => 'U',
+    'Ü' => 'Ue',
+    'Ű' => 'U',
+    'Ũ' => 'U',
+    'Ų' => 'U',
+    'Ū' => 'U',
+    'Ư' => 'U',
+    'Ẃ' => 'W',
+    'Ẁ' => 'W',
+    'Ŵ' => 'W',
+    'Ẅ' => 'W',
+    'Ý' => 'Y',
+    'Ỳ' => 'Y',
+    'Ŷ' => 'Y',
+    'Ÿ' => 'Y',
+    'Ź' => 'Z',
+    'Ž' => 'Z',
+    'Ż' => 'Z',
+    'Þ' => 'Th',
+];