208 lines
		
	
	
		
			9.0 KiB
		
	
	
	
		
			PHP
		
	
	
		
		
			
		
	
	
			208 lines
		
	
	
		
			9.0 KiB
		
	
	
	
		
			PHP
		
	
	
| 
								 | 
							
								<?php
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/**
							 | 
						||
| 
								 | 
							
								 * Removes all unrecognized tags from the list of tokens.
							 | 
						||
| 
								 | 
							
								 *
							 | 
						||
| 
								 | 
							
								 * This strategy iterates through all the tokens and removes unrecognized
							 | 
						||
| 
								 | 
							
								 * tokens. If a token is not recognized but a TagTransform is defined for
							 | 
						||
| 
								 | 
							
								 * that element, the element will be transformed accordingly.
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * @param HTMLPurifier_Token[] $tokens
							 | 
						||
| 
								 | 
							
								     * @param HTMLPurifier_Config $config
							 | 
						||
| 
								 | 
							
								     * @param HTMLPurifier_Context $context
							 | 
						||
| 
								 | 
							
								     * @return array|HTMLPurifier_Token[]
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    public function execute($tokens, $config, $context)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        $definition = $config->getHTMLDefinition();
							 | 
						||
| 
								 | 
							
								        $generator = new HTMLPurifier_Generator($config, $context);
							 | 
						||
| 
								 | 
							
								        $result = array();
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
							 | 
						||
| 
								 | 
							
								        $remove_invalid_img = $config->get('Core.RemoveInvalidImg');
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        // currently only used to determine if comments should be kept
							 | 
						||
| 
								 | 
							
								        $trusted = $config->get('HTML.Trusted');
							 | 
						||
| 
								 | 
							
								        $comment_lookup = $config->get('HTML.AllowedComments');
							 | 
						||
| 
								 | 
							
								        $comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
							 | 
						||
| 
								 | 
							
								        $check_comments = $comment_lookup !== array() || $comment_regexp !== null;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        $remove_script_contents = $config->get('Core.RemoveScriptContents');
							 | 
						||
| 
								 | 
							
								        $hidden_elements = $config->get('Core.HiddenElements');
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        // remove script contents compatibility
							 | 
						||
| 
								 | 
							
								        if ($remove_script_contents === true) {
							 | 
						||
| 
								 | 
							
								            $hidden_elements['script'] = true;
							 | 
						||
| 
								 | 
							
								        } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
							 | 
						||
| 
								 | 
							
								            unset($hidden_elements['script']);
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        $attr_validator = new HTMLPurifier_AttrValidator();
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        // removes tokens until it reaches a closing tag with its value
							 | 
						||
| 
								 | 
							
								        $remove_until = false;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        // converts comments into text tokens when this is equal to a tag name
							 | 
						||
| 
								 | 
							
								        $textify_comments = false;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        $token = false;
							 | 
						||
| 
								 | 
							
								        $context->register('CurrentToken', $token);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        $e = false;
							 | 
						||
| 
								 | 
							
								        if ($config->get('Core.CollectErrors')) {
							 | 
						||
| 
								 | 
							
								            $e =& $context->get('ErrorCollector');
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        foreach ($tokens as $token) {
							 | 
						||
| 
								 | 
							
								            if ($remove_until) {
							 | 
						||
| 
								 | 
							
								                if (empty($token->is_tag) || $token->name !== $remove_until) {
							 | 
						||
| 
								 | 
							
								                    continue;
							 | 
						||
| 
								 | 
							
								                }
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								            if (!empty($token->is_tag)) {
							 | 
						||
| 
								 | 
							
								                // DEFINITION CALL
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								                // before any processing, try to transform the element
							 | 
						||
| 
								 | 
							
								                if (isset($definition->info_tag_transform[$token->name])) {
							 | 
						||
| 
								 | 
							
								                    $original_name = $token->name;
							 | 
						||
| 
								 | 
							
								                    // there is a transformation for this tag
							 | 
						||
| 
								 | 
							
								                    // DEFINITION CALL
							 | 
						||
| 
								 | 
							
								                    $token = $definition->
							 | 
						||
| 
								 | 
							
								                        info_tag_transform[$token->name]->transform($token, $config, $context);
							 | 
						||
| 
								 | 
							
								                    if ($e) {
							 | 
						||
| 
								 | 
							
								                        $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
							 | 
						||
| 
								 | 
							
								                    }
							 | 
						||
| 
								 | 
							
								                }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								                if (isset($definition->info[$token->name])) {
							 | 
						||
| 
								 | 
							
								                    // mostly everything's good, but
							 | 
						||
| 
								 | 
							
								                    // we need to make sure required attributes are in order
							 | 
						||
| 
								 | 
							
								                    if (($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&
							 | 
						||
| 
								 | 
							
								                        $definition->info[$token->name]->required_attr &&
							 | 
						||
| 
								 | 
							
								                        ($token->name != 'img' || $remove_invalid_img) // ensure config option still works
							 | 
						||
| 
								 | 
							
								                    ) {
							 | 
						||
| 
								 | 
							
								                        $attr_validator->validateToken($token, $config, $context);
							 | 
						||
| 
								 | 
							
								                        $ok = true;
							 | 
						||
| 
								 | 
							
								                        foreach ($definition->info[$token->name]->required_attr as $name) {
							 | 
						||
| 
								 | 
							
								                            if (!isset($token->attr[$name])) {
							 | 
						||
| 
								 | 
							
								                                $ok = false;
							 | 
						||
| 
								 | 
							
								                                break;
							 | 
						||
| 
								 | 
							
								                            }
							 | 
						||
| 
								 | 
							
								                        }
							 | 
						||
| 
								 | 
							
								                        if (!$ok) {
							 | 
						||
| 
								 | 
							
								                            if ($e) {
							 | 
						||
| 
								 | 
							
								                                $e->send(
							 | 
						||
| 
								 | 
							
								                                    E_ERROR,
							 | 
						||
| 
								 | 
							
								                                    'Strategy_RemoveForeignElements: Missing required attribute',
							 | 
						||
| 
								 | 
							
								                                    $name
							 | 
						||
| 
								 | 
							
								                                );
							 | 
						||
| 
								 | 
							
								                            }
							 | 
						||
| 
								 | 
							
								                            continue;
							 | 
						||
| 
								 | 
							
								                        }
							 | 
						||
| 
								 | 
							
								                        $token->armor['ValidateAttributes'] = true;
							 | 
						||
| 
								 | 
							
								                    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								                    if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
							 | 
						||
| 
								 | 
							
								                        $textify_comments = $token->name;
							 | 
						||
| 
								 | 
							
								                    } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
							 | 
						||
| 
								 | 
							
								                        $textify_comments = false;
							 | 
						||
| 
								 | 
							
								                    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								                } elseif ($escape_invalid_tags) {
							 | 
						||
| 
								 | 
							
								                    // invalid tag, generate HTML representation and insert in
							 | 
						||
| 
								 | 
							
								                    if ($e) {
							 | 
						||
| 
								 | 
							
								                        $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
							 | 
						||
| 
								 | 
							
								                    }
							 | 
						||
| 
								 | 
							
								                    $token = new HTMLPurifier_Token_Text(
							 | 
						||
| 
								 | 
							
								                        $generator->generateFromToken($token)
							 | 
						||
| 
								 | 
							
								                    );
							 | 
						||
| 
								 | 
							
								                } else {
							 | 
						||
| 
								 | 
							
								                    // check if we need to destroy all of the tag's children
							 | 
						||
| 
								 | 
							
								                    // CAN BE GENERICIZED
							 | 
						||
| 
								 | 
							
								                    if (isset($hidden_elements[$token->name])) {
							 | 
						||
| 
								 | 
							
								                        if ($token instanceof HTMLPurifier_Token_Start) {
							 | 
						||
| 
								 | 
							
								                            $remove_until = $token->name;
							 | 
						||
| 
								 | 
							
								                        } elseif ($token instanceof HTMLPurifier_Token_Empty) {
							 | 
						||
| 
								 | 
							
								                            // do nothing: we're still looking
							 | 
						||
| 
								 | 
							
								                        } else {
							 | 
						||
| 
								 | 
							
								                            $remove_until = false;
							 | 
						||
| 
								 | 
							
								                        }
							 | 
						||
| 
								 | 
							
								                        if ($e) {
							 | 
						||
| 
								 | 
							
								                            $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
							 | 
						||
| 
								 | 
							
								                        }
							 | 
						||
| 
								 | 
							
								                    } else {
							 | 
						||
| 
								 | 
							
								                        if ($e) {
							 | 
						||
| 
								 | 
							
								                            $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
							 | 
						||
| 
								 | 
							
								                        }
							 | 
						||
| 
								 | 
							
								                    }
							 | 
						||
| 
								 | 
							
								                    continue;
							 | 
						||
| 
								 | 
							
								                }
							 | 
						||
| 
								 | 
							
								            } elseif ($token instanceof HTMLPurifier_Token_Comment) {
							 | 
						||
| 
								 | 
							
								                // textify comments in script tags when they are allowed
							 | 
						||
| 
								 | 
							
								                if ($textify_comments !== false) {
							 | 
						||
| 
								 | 
							
								                    $data = $token->data;
							 | 
						||
| 
								 | 
							
								                    $token = new HTMLPurifier_Token_Text($data);
							 | 
						||
| 
								 | 
							
								                } elseif ($trusted || $check_comments) {
							 | 
						||
| 
								 | 
							
								                    // always cleanup comments
							 | 
						||
| 
								 | 
							
								                    $trailing_hyphen = false;
							 | 
						||
| 
								 | 
							
								                    if ($e) {
							 | 
						||
| 
								 | 
							
								                        // perform check whether or not there's a trailing hyphen
							 | 
						||
| 
								 | 
							
								                        if (substr($token->data, -1) == '-') {
							 | 
						||
| 
								 | 
							
								                            $trailing_hyphen = true;
							 | 
						||
| 
								 | 
							
								                        }
							 | 
						||
| 
								 | 
							
								                    }
							 | 
						||
| 
								 | 
							
								                    $token->data = rtrim($token->data, '-');
							 | 
						||
| 
								 | 
							
								                    $found_double_hyphen = false;
							 | 
						||
| 
								 | 
							
								                    while (strpos($token->data, '--') !== false) {
							 | 
						||
| 
								 | 
							
								                        $found_double_hyphen = true;
							 | 
						||
| 
								 | 
							
								                        $token->data = str_replace('--', '-', $token->data);
							 | 
						||
| 
								 | 
							
								                    }
							 | 
						||
| 
								 | 
							
								                    if ($trusted || !empty($comment_lookup[trim($token->data)]) ||
							 | 
						||
| 
								 | 
							
								                        ($comment_regexp !== null && preg_match($comment_regexp, trim($token->data)))) {
							 | 
						||
| 
								 | 
							
								                        // OK good
							 | 
						||
| 
								 | 
							
								                        if ($e) {
							 | 
						||
| 
								 | 
							
								                            if ($trailing_hyphen) {
							 | 
						||
| 
								 | 
							
								                                $e->send(
							 | 
						||
| 
								 | 
							
								                                    E_NOTICE,
							 | 
						||
| 
								 | 
							
								                                    'Strategy_RemoveForeignElements: Trailing hyphen in comment removed'
							 | 
						||
| 
								 | 
							
								                                );
							 | 
						||
| 
								 | 
							
								                            }
							 | 
						||
| 
								 | 
							
								                            if ($found_double_hyphen) {
							 | 
						||
| 
								 | 
							
								                                $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
							 | 
						||
| 
								 | 
							
								                            }
							 | 
						||
| 
								 | 
							
								                        }
							 | 
						||
| 
								 | 
							
								                    } else {
							 | 
						||
| 
								 | 
							
								                        if ($e) {
							 | 
						||
| 
								 | 
							
								                            $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
							 | 
						||
| 
								 | 
							
								                        }
							 | 
						||
| 
								 | 
							
								                        continue;
							 | 
						||
| 
								 | 
							
								                    }
							 | 
						||
| 
								 | 
							
								                } else {
							 | 
						||
| 
								 | 
							
								                    // strip comments
							 | 
						||
| 
								 | 
							
								                    if ($e) {
							 | 
						||
| 
								 | 
							
								                        $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
							 | 
						||
| 
								 | 
							
								                    }
							 | 
						||
| 
								 | 
							
								                    continue;
							 | 
						||
| 
								 | 
							
								                }
							 | 
						||
| 
								 | 
							
								            } elseif ($token instanceof HTMLPurifier_Token_Text) {
							 | 
						||
| 
								 | 
							
								            } else {
							 | 
						||
| 
								 | 
							
								                continue;
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								            $result[] = $token;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        if ($remove_until && $e) {
							 | 
						||
| 
								 | 
							
								            // we removed tokens until the end, throw error
							 | 
						||
| 
								 | 
							
								            $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								        $context->destroy('CurrentToken');
							 | 
						||
| 
								 | 
							
								        return $result;
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// vim: et sw=4 sts=4
							 |