231 lines
		
	
	
		
			7.2 KiB
		
	
	
	
		
			PHP
		
	
	
		
		
			
		
	
	
			231 lines
		
	
	
		
			7.2 KiB
		
	
	
	
		
			PHP
		
	
	
| 
								 | 
							
								<?php
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/**
							 | 
						||
| 
								 | 
							
								 * Abstract class for a set of proprietary modules that clean up (tidy)
							 | 
						||
| 
								 | 
							
								 * poorly written HTML.
							 | 
						||
| 
								 | 
							
								 * @todo Figure out how to protect some of these methods/properties
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * List of supported levels.
							 | 
						||
| 
								 | 
							
								     * Index zero is a special case "no fixes" level.
							 | 
						||
| 
								 | 
							
								     * @type array
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    public $levels = array(0 => 'none', 'light', 'medium', 'heavy');
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * Default level to place all fixes in.
							 | 
						||
| 
								 | 
							
								     * Disabled by default.
							 | 
						||
| 
								 | 
							
								     * @type string
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    public $defaultLevel = null;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * Lists of fixes used by getFixesForLevel().
							 | 
						||
| 
								 | 
							
								     * Format is:
							 | 
						||
| 
								 | 
							
								     *      HTMLModule_Tidy->fixesForLevel[$level] = array('fix-1', 'fix-2');
							 | 
						||
| 
								 | 
							
								     * @type array
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    public $fixesForLevel = array(
							 | 
						||
| 
								 | 
							
								        'light' => array(),
							 | 
						||
| 
								 | 
							
								        'medium' => array(),
							 | 
						||
| 
								 | 
							
								        'heavy' => array()
							 | 
						||
| 
								 | 
							
								    );
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * Lazy load constructs the module by determining the necessary
							 | 
						||
| 
								 | 
							
								     * fixes to create and then delegating to the populate() function.
							 | 
						||
| 
								 | 
							
								     * @param HTMLPurifier_Config $config
							 | 
						||
| 
								 | 
							
								     * @todo Wildcard matching and error reporting when an added or
							 | 
						||
| 
								 | 
							
								     *       subtracted fix has no effect.
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    public function setup($config)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        // create fixes, initialize fixesForLevel
							 | 
						||
| 
								 | 
							
								        $fixes = $this->makeFixes();
							 | 
						||
| 
								 | 
							
								        $this->makeFixesForLevel($fixes);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        // figure out which fixes to use
							 | 
						||
| 
								 | 
							
								        $level = $config->get('HTML.TidyLevel');
							 | 
						||
| 
								 | 
							
								        $fixes_lookup = $this->getFixesForLevel($level);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        // get custom fix declarations: these need namespace processing
							 | 
						||
| 
								 | 
							
								        $add_fixes = $config->get('HTML.TidyAdd');
							 | 
						||
| 
								 | 
							
								        $remove_fixes = $config->get('HTML.TidyRemove');
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        foreach ($fixes as $name => $fix) {
							 | 
						||
| 
								 | 
							
								            // needs to be refactored a little to implement globbing
							 | 
						||
| 
								 | 
							
								            if (isset($remove_fixes[$name]) ||
							 | 
						||
| 
								 | 
							
								                (!isset($add_fixes[$name]) && !isset($fixes_lookup[$name]))) {
							 | 
						||
| 
								 | 
							
								                unset($fixes[$name]);
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        // populate this module with necessary fixes
							 | 
						||
| 
								 | 
							
								        $this->populate($fixes);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * Retrieves all fixes per a level, returning fixes for that specific
							 | 
						||
| 
								 | 
							
								     * level as well as all levels below it.
							 | 
						||
| 
								 | 
							
								     * @param string $level level identifier, see $levels for valid values
							 | 
						||
| 
								 | 
							
								     * @return array Lookup up table of fixes
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    public function getFixesForLevel($level)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        if ($level == $this->levels[0]) {
							 | 
						||
| 
								 | 
							
								            return array();
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        $activated_levels = array();
							 | 
						||
| 
								 | 
							
								        for ($i = 1, $c = count($this->levels); $i < $c; $i++) {
							 | 
						||
| 
								 | 
							
								            $activated_levels[] = $this->levels[$i];
							 | 
						||
| 
								 | 
							
								            if ($this->levels[$i] == $level) {
							 | 
						||
| 
								 | 
							
								                break;
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        if ($i == $c) {
							 | 
						||
| 
								 | 
							
								            trigger_error(
							 | 
						||
| 
								 | 
							
								                'Tidy level ' . htmlspecialchars($level) . ' not recognized',
							 | 
						||
| 
								 | 
							
								                E_USER_WARNING
							 | 
						||
| 
								 | 
							
								            );
							 | 
						||
| 
								 | 
							
								            return array();
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        $ret = array();
							 | 
						||
| 
								 | 
							
								        foreach ($activated_levels as $level) {
							 | 
						||
| 
								 | 
							
								            foreach ($this->fixesForLevel[$level] as $fix) {
							 | 
						||
| 
								 | 
							
								                $ret[$fix] = true;
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        return $ret;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * Dynamically populates the $fixesForLevel member variable using
							 | 
						||
| 
								 | 
							
								     * the fixes array. It may be custom overloaded, used in conjunction
							 | 
						||
| 
								 | 
							
								     * with $defaultLevel, or not used at all.
							 | 
						||
| 
								 | 
							
								     * @param array $fixes
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    public function makeFixesForLevel($fixes)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        if (!isset($this->defaultLevel)) {
							 | 
						||
| 
								 | 
							
								            return;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        if (!isset($this->fixesForLevel[$this->defaultLevel])) {
							 | 
						||
| 
								 | 
							
								            trigger_error(
							 | 
						||
| 
								 | 
							
								                'Default level ' . $this->defaultLevel . ' does not exist',
							 | 
						||
| 
								 | 
							
								                E_USER_ERROR
							 | 
						||
| 
								 | 
							
								            );
							 | 
						||
| 
								 | 
							
								            return;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        $this->fixesForLevel[$this->defaultLevel] = array_keys($fixes);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * Populates the module with transforms and other special-case code
							 | 
						||
| 
								 | 
							
								     * based on a list of fixes passed to it
							 | 
						||
| 
								 | 
							
								     * @param array $fixes Lookup table of fixes to activate
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    public function populate($fixes)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        foreach ($fixes as $name => $fix) {
							 | 
						||
| 
								 | 
							
								            // determine what the fix is for
							 | 
						||
| 
								 | 
							
								            list($type, $params) = $this->getFixType($name);
							 | 
						||
| 
								 | 
							
								            switch ($type) {
							 | 
						||
| 
								 | 
							
								                case 'attr_transform_pre':
							 | 
						||
| 
								 | 
							
								                case 'attr_transform_post':
							 | 
						||
| 
								 | 
							
								                    $attr = $params['attr'];
							 | 
						||
| 
								 | 
							
								                    if (isset($params['element'])) {
							 | 
						||
| 
								 | 
							
								                        $element = $params['element'];
							 | 
						||
| 
								 | 
							
								                        if (empty($this->info[$element])) {
							 | 
						||
| 
								 | 
							
								                            $e = $this->addBlankElement($element);
							 | 
						||
| 
								 | 
							
								                        } else {
							 | 
						||
| 
								 | 
							
								                            $e = $this->info[$element];
							 | 
						||
| 
								 | 
							
								                        }
							 | 
						||
| 
								 | 
							
								                    } else {
							 | 
						||
| 
								 | 
							
								                        $type = "info_$type";
							 | 
						||
| 
								 | 
							
								                        $e = $this;
							 | 
						||
| 
								 | 
							
								                    }
							 | 
						||
| 
								 | 
							
								                    // PHP does some weird parsing when I do
							 | 
						||
| 
								 | 
							
								                    // $e->$type[$attr], so I have to assign a ref.
							 | 
						||
| 
								 | 
							
								                    $f =& $e->$type;
							 | 
						||
| 
								 | 
							
								                    $f[$attr] = $fix;
							 | 
						||
| 
								 | 
							
								                    break;
							 | 
						||
| 
								 | 
							
								                case 'tag_transform':
							 | 
						||
| 
								 | 
							
								                    $this->info_tag_transform[$params['element']] = $fix;
							 | 
						||
| 
								 | 
							
								                    break;
							 | 
						||
| 
								 | 
							
								                case 'child':
							 | 
						||
| 
								 | 
							
								                case 'content_model_type':
							 | 
						||
| 
								 | 
							
								                    $element = $params['element'];
							 | 
						||
| 
								 | 
							
								                    if (empty($this->info[$element])) {
							 | 
						||
| 
								 | 
							
								                        $e = $this->addBlankElement($element);
							 | 
						||
| 
								 | 
							
								                    } else {
							 | 
						||
| 
								 | 
							
								                        $e = $this->info[$element];
							 | 
						||
| 
								 | 
							
								                    }
							 | 
						||
| 
								 | 
							
								                    $e->$type = $fix;
							 | 
						||
| 
								 | 
							
								                    break;
							 | 
						||
| 
								 | 
							
								                default:
							 | 
						||
| 
								 | 
							
								                    trigger_error("Fix type $type not supported", E_USER_ERROR);
							 | 
						||
| 
								 | 
							
								                    break;
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * Parses a fix name and determines what kind of fix it is, as well
							 | 
						||
| 
								 | 
							
								     * as other information defined by the fix
							 | 
						||
| 
								 | 
							
								     * @param $name String name of fix
							 | 
						||
| 
								 | 
							
								     * @return array(string $fix_type, array $fix_parameters)
							 | 
						||
| 
								 | 
							
								     * @note $fix_parameters is type dependant, see populate() for usage
							 | 
						||
| 
								 | 
							
								     *       of these parameters
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    public function getFixType($name)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        // parse it
							 | 
						||
| 
								 | 
							
								        $property = $attr = null;
							 | 
						||
| 
								 | 
							
								        if (strpos($name, '#') !== false) {
							 | 
						||
| 
								 | 
							
								            list($name, $property) = explode('#', $name);
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        if (strpos($name, '@') !== false) {
							 | 
						||
| 
								 | 
							
								            list($name, $attr) = explode('@', $name);
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        // figure out the parameters
							 | 
						||
| 
								 | 
							
								        $params = array();
							 | 
						||
| 
								 | 
							
								        if ($name !== '') {
							 | 
						||
| 
								 | 
							
								            $params['element'] = $name;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        if (!is_null($attr)) {
							 | 
						||
| 
								 | 
							
								            $params['attr'] = $attr;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        // special case: attribute transform
							 | 
						||
| 
								 | 
							
								        if (!is_null($attr)) {
							 | 
						||
| 
								 | 
							
								            if (is_null($property)) {
							 | 
						||
| 
								 | 
							
								                $property = 'pre';
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								            $type = 'attr_transform_' . $property;
							 | 
						||
| 
								 | 
							
								            return array($type, $params);
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        // special case: tag transform
							 | 
						||
| 
								 | 
							
								        if (is_null($property)) {
							 | 
						||
| 
								 | 
							
								            return array('tag_transform', $params);
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        return array($property, $params);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * Defines all fixes the module will perform in a compact
							 | 
						||
| 
								 | 
							
								     * associative array of fix name to fix implementation.
							 | 
						||
| 
								 | 
							
								     * @return array
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    public function makeFixes()
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// vim: et sw=4 sts=4
							 |