HTML entity encoding everything in PHP

The standard htmlentities() function will encode special characters so that they display OK in the browser. However, sometimes you might want to encode different languages to entities too. I just found a script that will do that in the form of an html encode application. The source is browsable but a little bit impenetrable so I fixed it up a little:

/* ***** Text to UTF-8 or HTML Entities tool ***************
* Copyright (c) 2006 - Brian Huisman (GreyWyvern)
* 
* This script is licenced under the BSD licence:
*   http://www.greywyvern.com/code/bsd
*************************************************************** */

class HtmlEnc{
  static function uniord($c) {
    $ud = 0;
    if (ord($c{0}) >= 0 && ord($c{0}) <= 127) $ud = ord($c{0});
    if (ord($c{0}) >= 192 && ord($c{0}) <= 223) $ud = (ord($c{0})-192)*64 + (ord($c{1})-128);
    if (ord($c{0}) >= 224 && ord($c{0}) <= 239) $ud = (ord($c{0})-224)*4096 + (ord($c{1})-128)*64 + (ord($c{2})-128);
    if (ord($c{0}) >= 240 && ord($c{0}) <= 247) $ud = (ord($c{0})-240)*262144 + (ord($c{1})-128)*4096 + (ord($c{2})-128)*64 + (ord($c{3})-128);
    if (ord($c{0}) >= 248 && ord($c{0}) <= 251) $ud = (ord($c{0})-248)*16777216 + (ord($c{1})-128)*262144 + (ord($c{2})-128)*4096 + (ord($c{3})-128)*64 + (ord($c{4})-128);
    if (ord($c{0}) >= 252 && ord($c{0}) <= 253) $ud = (ord($c{0})-252)*1073741824 + (ord($c{1})-128)*16777216 + (ord($c{2})-128)*262144 + (ord($c{3})-128)*4096 + (ord($c{4})-128)*64 + (ord($c{5})-128);
    if (ord($c{0}) >= 254 && ord($c{0}) <= 255) $ud = false; // error
    return $ud;
  }
  
  static function toHtml($str){
      $html_str = "";
      while (strlen($str) > 0) {
        preg_match("/^(.)(.*)$/u", $str, $match);
        $test = utf8_decode($match[1]);
        if ($test != "?") 
          $html_str .= htmlentities(htmlentities($test));
        else if (strlen($match[1]) > 1)
          $html_str .= "&amp;#".self::uniord($match[1]).";";
        else 
          $html_str .= htmlentities(htmlentities($match[1]));
        $str = $match[2];
      }
      return $html_str;
  }
}

echo HtmlEnc::toHtml("hello กรุณาเลือกประเทศ");

Enjoy!

(Note: The string that is to be entity encoded might have to be utf8 formatted first.)

Related Posts

Tags: ,