UnihanTag

<?php //{{MediaWikiExtension}}<source lang="php">/* * UnihanTag.php - A MediaWiki tag extension for adding <unihan> sections to a page. * @author Kenneth L. Root * @version 1.0.0 * @copyright Copyright (C) 2007 Kenneth L. Root * @license GNU General Public License (https://www.gnu.org/licenses/gpl.txt) * ----------------------------------------------------------------------- * Description: *     This is a MediaWiki extension which adds a <unihan> tag to the parser *     to allow inserting data about one or more Chinese characters into a *     page. * * Installation: *     1. Place this directory (UnihanTag) under $IP/extensions *     2. Download Unihan database at: *         https://unicode.org/Public/UNIDATA/Unihan.txt *     3. Convert the Unihan text file into a CDB with: *        php makedb *     4. Enable the extension by adding this line to your LocalSettings.php: *        require_once('extensions/UnihanTag/UnihanTag.php'); * * Usage: *     Once installed, you may utilize UnihanTag by adding the <unihan> tag to articles: *     <unihan add="mandarin" contours="yes">老师</unihan> * * Version Notes: *     version 1.0.0: *     Initial release. * ----------------------------------------------------------------------- * Copyright (c) 2007 Kenneth L. Root * *    This program is free software: you can redistribute it and/or modify *    it under the terms of the GNU General Public License as published by *    the Free Software Foundation, either version 3 of the License, or *    (at your option) any later version. * *    This program is distributed in the hope that it will be useful, *    but WITHOUT ANY WARRANTY; without even the implied warranty of *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the *    GNU General Public License for more details. * *    You should have received a copy of the GNU General Public License *    along with this program.  If not, see <https://www.gnu.org/licenses/>. * ----------------------------------------------------------------------- */
if (!defined('MEDIAWIKI')) {  die( "This file is part of MediaWiki and is not a valid entry point\n" );}
$wgExtensionFunctions[] = 'unihanTag_init';$wgExtensionCredits['parserhook'][] = array(  'name' => 'UnihanTag',  'version' => '1.0.0',  'author' => '[https://the-b.org/ Kenny Root]',  'url' => 'https://the-b.org/UnihanTag',  'description' => 'Tag to add data from the Unihan database to Chinese characters.',);
/** * Sets up the UnihanTag Parser hook and system messages */function unihanTag_init() {  global $wgParser, $wgMessageCache;  $wgParser->setHook( 'unihan', 'unihanTag_render' );
  $wgMessageCache->addMessage(      'unihantag-missing-add',      'Error: <unihan> tag must contain a "add" attribute.'  );
  $wgMessageCache->addMessage(      'unihantag-no-db',      'Error: No Unihan database!'  );
  unihanTag_openDb();}
function unihanTag_openDb() {  global $unihanTag_db;
  $scriptDirectory = substr(__FILE__, 0, strrpos(__FILE__, '/'));  $unihanTag_db = dba_popen("$scriptDirectory/Unihan.cdb", "r", "cdb");}
function unihanTag_render( $text, $params = array(), &$parser ) {  global $unihanTag_db;
  # Short-circuit with error message if content is not specified.  if (!isset($params['add'])) {    return      '<div class="errorbox">'.      wfMsgForContent('unihantag-missing-add').      '</div>';  }
  if (!$unihanTag_db) {    return      '<div class="errorbox">'.      wfMsgForContent('unihantag-no-db').      '</div>';  }
  $unicode = unihanTag_utf8ToUnicode($text);
  $add = array();  $keys = preg_split('/,\s+/', $params['add'], -1, PREG_SPLIT_NO_EMPTY);
  if (!array_key_exists('original', $params) or $params['original'] != "no") {    $output = $text;  }
  $want_contours = 0;  if (array_key_exists('contours', $params) and $params['contours'] == 'yes')    $want_contours = 1;
  if (in_array('mandarin', $keys)) {    $mandarin = array();    foreach ($unicode as &$char) {      $pinyin = dba_fetch(dechex($char) . "-mandarin", $unihanTag_db);      if ($want_contours)        $pinyin = unihanTag_pinyinContour($pinyin);      $mandarin[] = $pinyin;    }    $add[] = "Pinyin: " . join(" ", $mandarin);  }
  if (in_array('cantonese', $keys)) {    $cantonese = array();    foreach ($unicode as &$char) {      $cantonese[] = dba_fetch(dechex($char) . "-cantonese", $unihanTag_db);    }    $add[] = "Jyutping: " . join(" ", $cantonese);  }
  if ($add) {    $to_add = join(", ", $add);    if ($output)      $output .= " (" . join(", ", $add) . ")";    else      $output = $to_add;  }
  return $output;}
function unihanTag_pinyinContour($pinyin) {  global $unihanTag_pinyinContours;  $sound = substr($pinyin, 0, -1);  $tone = substr($pinyin, -1, 1);
  $pos = 0;  if ($pos = stripos($sound, "a") or $pos = stripos($sound, "e")) {  } elseif ($pos = stripos($sound, "ou")) {  } else {    preg_match("/([aeiouü])([^aeiouü]*)?$/", $sound, $matches, PREG_OFFSET_CAPTURE);    $pos = $matches[0][1];  }
  return substr($sound, 0, $pos) . $unihanTag_pinyinContours[substr($sound, $pos, 1)][$tone] . substr($sound, $pos+1);}
$unihanTag_pinyinContours = array(  'a' => array(    1 => 'ā',    2 => 'á',    3 => 'ǎ',    4 => 'à',    5 => 'a',  ),  'e' => array(    1 => 'ē',    2 => 'é',    3 => 'ě',    4 => 'è',    5 => 'e',  ),  'i' => array(    1 => 'ī',    2 => 'í',    3 => 'ǐ',    4 => 'ì',    5 => 'i',  ),  'o' => array(    1 => 'ō',    2 => 'ó',    3 => 'ǒ',    4 => 'ò',    5 => 'o',  ),  'u' => array(    1 => 'ū',    2 => 'ú',    3 => 'ǔ',    4 => 'ù',    5 => 'u',  ),  'ü' => array(    1 => 'ǖ',    2 => 'ǘ',    3 => 'ǚ',    4 => 'ǜ',    5 => 'ü',  ),);

function unihanTag_jyutpingToYale($syllable) {  // TODO}
function unihanTag_utf8ToUnicode( $str ) {  $unicode = array();  $values = array();  $lookingFor = 1;
  for ($i = 0; $i < strlen( $str ); $i++ ) {
    $thisValue = ord( $str[ $i ] );
    if ( $thisValue < 128 ) $unicode[] = $thisValue;    else {
      if ( count( $values ) == 0 ) $lookingFor = ( $thisValue < 224 ) ? 2 : 3;
      $values[] = $thisValue;
      if ( count( $values ) == $lookingFor ) {
        $number = ( $lookingFor == 3 ) ?          ( ( $values[0] % 16 ) * 4096 ) + ( ( $values[1] % 64 ) * 64 ) + ( $values[2] % 64 ):          ( ( $values[0] % 32 ) * 64 ) + ( $values[1] % 64 );
        $unicode[] = $number;        $values = array();        $lookingFor = 1;      } // if    }  }
  return $unicode;}//</source>

Kenny Root

Copyright © Kenny Root. All rights reserved.