112 lines
		
	
	
		
			3.5 KiB
		
	
	
	
		
			PHP
		
	
	
		
		
			
		
	
	
			112 lines
		
	
	
		
			3.5 KiB
		
	
	
	
		
			PHP
		
	
	
| 
								 | 
							
								<?php
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/**
							 | 
						||
| 
								 | 
							
								 * Class that handles operations involving percent-encoding in URIs.
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 * @warning
							 | 
						||
| 
								 | 
							
								 *      Be careful when reusing instances of PercentEncoder. The object
							 | 
						||
| 
								 | 
							
								 *      you use for normalize() SHOULD NOT be used for encode(), or
							 | 
						||
| 
								 | 
							
								 *      vice-versa.
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								class HTMLPurifier_PercentEncoder
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * Reserved characters to preserve when using encode().
							 | 
						||
| 
								 | 
							
								     * @type array
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    protected $preserve = array();
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * String of characters that should be preserved while using encode().
							 | 
						||
| 
								 | 
							
								     * @param bool $preserve
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    public function __construct($preserve = false)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        // unreserved letters, ought to const-ify
							 | 
						||
| 
								 | 
							
								        for ($i = 48; $i <= 57; $i++) { // digits
							 | 
						||
| 
								 | 
							
								            $this->preserve[$i] = true;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        for ($i = 65; $i <= 90; $i++) { // upper-case
							 | 
						||
| 
								 | 
							
								            $this->preserve[$i] = true;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        for ($i = 97; $i <= 122; $i++) { // lower-case
							 | 
						||
| 
								 | 
							
								            $this->preserve[$i] = true;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        $this->preserve[45] = true; // Dash         -
							 | 
						||
| 
								 | 
							
								        $this->preserve[46] = true; // Period       .
							 | 
						||
| 
								 | 
							
								        $this->preserve[95] = true; // Underscore   _
							 | 
						||
| 
								 | 
							
								        $this->preserve[126]= true; // Tilde        ~
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        // extra letters not to escape
							 | 
						||
| 
								 | 
							
								        if ($preserve !== false) {
							 | 
						||
| 
								 | 
							
								            for ($i = 0, $c = strlen($preserve); $i < $c; $i++) {
							 | 
						||
| 
								 | 
							
								                $this->preserve[ord($preserve[$i])] = true;
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * Our replacement for urlencode, it encodes all non-reserved characters,
							 | 
						||
| 
								 | 
							
								     * as well as any extra characters that were instructed to be preserved.
							 | 
						||
| 
								 | 
							
								     * @note
							 | 
						||
| 
								 | 
							
								     *      Assumes that the string has already been normalized, making any
							 | 
						||
| 
								 | 
							
								     *      and all percent escape sequences valid. Percents will not be
							 | 
						||
| 
								 | 
							
								     *      re-escaped, regardless of their status in $preserve
							 | 
						||
| 
								 | 
							
								     * @param string $string String to be encoded
							 | 
						||
| 
								 | 
							
								     * @return string Encoded string.
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    public function encode($string)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        $ret = '';
							 | 
						||
| 
								 | 
							
								        for ($i = 0, $c = strlen($string); $i < $c; $i++) {
							 | 
						||
| 
								 | 
							
								            if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])])) {
							 | 
						||
| 
								 | 
							
								                $ret .= '%' . sprintf('%02X', $int);
							 | 
						||
| 
								 | 
							
								            } else {
							 | 
						||
| 
								 | 
							
								                $ret .= $string[$i];
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        return $ret;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * Fix up percent-encoding by decoding unreserved characters and normalizing.
							 | 
						||
| 
								 | 
							
								     * @warning This function is affected by $preserve, even though the
							 | 
						||
| 
								 | 
							
								     *          usual desired behavior is for this not to preserve those
							 | 
						||
| 
								 | 
							
								     *          characters. Be careful when reusing instances of PercentEncoder!
							 | 
						||
| 
								 | 
							
								     * @param string $string String to normalize
							 | 
						||
| 
								 | 
							
								     * @return string
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    public function normalize($string)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        if ($string == '') {
							 | 
						||
| 
								 | 
							
								            return '';
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        $parts = explode('%', $string);
							 | 
						||
| 
								 | 
							
								        $ret = array_shift($parts);
							 | 
						||
| 
								 | 
							
								        foreach ($parts as $part) {
							 | 
						||
| 
								 | 
							
								            $length = strlen($part);
							 | 
						||
| 
								 | 
							
								            if ($length < 2) {
							 | 
						||
| 
								 | 
							
								                $ret .= '%25' . $part;
							 | 
						||
| 
								 | 
							
								                continue;
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								            $encoding = substr($part, 0, 2);
							 | 
						||
| 
								 | 
							
								            $text     = substr($part, 2);
							 | 
						||
| 
								 | 
							
								            if (!ctype_xdigit($encoding)) {
							 | 
						||
| 
								 | 
							
								                $ret .= '%25' . $part;
							 | 
						||
| 
								 | 
							
								                continue;
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								            $int = hexdec($encoding);
							 | 
						||
| 
								 | 
							
								            if (isset($this->preserve[$int])) {
							 | 
						||
| 
								 | 
							
								                $ret .= chr($int) . $text;
							 | 
						||
| 
								 | 
							
								                continue;
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								            $encoding = strtoupper($encoding);
							 | 
						||
| 
								 | 
							
								            $ret .= '%' . $encoding . $text;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        return $ret;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// vim: et sw=4 sts=4
							 |