208 lines
		
	
	
		
			9.0 KiB
		
	
	
	
		
			PHP
		
	
	
		
		
			
		
	
	
			208 lines
		
	
	
		
			9.0 KiB
		
	
	
	
		
			PHP
		
	
	
|  | <?php | ||
|  | 
 | ||
|  | /** | ||
|  |  * Removes all unrecognized tags from the list of tokens. | ||
|  |  * | ||
|  |  * This strategy iterates through all the tokens and removes unrecognized | ||
|  |  * tokens. If a token is not recognized but a TagTransform is defined for | ||
|  |  * that element, the element will be transformed accordingly. | ||
|  |  */ | ||
|  | 
 | ||
|  | class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy | ||
|  | { | ||
|  | 
 | ||
|  |     /** | ||
|  |      * @param HTMLPurifier_Token[] $tokens | ||
|  |      * @param HTMLPurifier_Config $config | ||
|  |      * @param HTMLPurifier_Context $context | ||
|  |      * @return array|HTMLPurifier_Token[] | ||
|  |      */ | ||
|  |     public function execute($tokens, $config, $context) | ||
|  |     { | ||
|  |         $definition = $config->getHTMLDefinition(); | ||
|  |         $generator = new HTMLPurifier_Generator($config, $context); | ||
|  |         $result = array(); | ||
|  | 
 | ||
|  |         $escape_invalid_tags = $config->get('Core.EscapeInvalidTags'); | ||
|  |         $remove_invalid_img = $config->get('Core.RemoveInvalidImg'); | ||
|  | 
 | ||
|  |         // currently only used to determine if comments should be kept
 | ||
|  |         $trusted = $config->get('HTML.Trusted'); | ||
|  |         $comment_lookup = $config->get('HTML.AllowedComments'); | ||
|  |         $comment_regexp = $config->get('HTML.AllowedCommentsRegexp'); | ||
|  |         $check_comments = $comment_lookup !== array() || $comment_regexp !== null; | ||
|  | 
 | ||
|  |         $remove_script_contents = $config->get('Core.RemoveScriptContents'); | ||
|  |         $hidden_elements = $config->get('Core.HiddenElements'); | ||
|  | 
 | ||
|  |         // remove script contents compatibility
 | ||
|  |         if ($remove_script_contents === true) { | ||
|  |             $hidden_elements['script'] = true; | ||
|  |         } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) { | ||
|  |             unset($hidden_elements['script']); | ||
|  |         } | ||
|  | 
 | ||
|  |         $attr_validator = new HTMLPurifier_AttrValidator(); | ||
|  | 
 | ||
|  |         // removes tokens until it reaches a closing tag with its value
 | ||
|  |         $remove_until = false; | ||
|  | 
 | ||
|  |         // converts comments into text tokens when this is equal to a tag name
 | ||
|  |         $textify_comments = false; | ||
|  | 
 | ||
|  |         $token = false; | ||
|  |         $context->register('CurrentToken', $token); | ||
|  | 
 | ||
|  |         $e = false; | ||
|  |         if ($config->get('Core.CollectErrors')) { | ||
|  |             $e =& $context->get('ErrorCollector'); | ||
|  |         } | ||
|  | 
 | ||
|  |         foreach ($tokens as $token) { | ||
|  |             if ($remove_until) { | ||
|  |                 if (empty($token->is_tag) || $token->name !== $remove_until) { | ||
|  |                     continue; | ||
|  |                 } | ||
|  |             } | ||
|  |             if (!empty($token->is_tag)) { | ||
|  |                 // DEFINITION CALL
 | ||
|  | 
 | ||
|  |                 // before any processing, try to transform the element
 | ||
|  |                 if (isset($definition->info_tag_transform[$token->name])) { | ||
|  |                     $original_name = $token->name; | ||
|  |                     // there is a transformation for this tag
 | ||
|  |                     // DEFINITION CALL
 | ||
|  |                     $token = $definition-> | ||
|  |                         info_tag_transform[$token->name]->transform($token, $config, $context); | ||
|  |                     if ($e) { | ||
|  |                         $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name); | ||
|  |                     } | ||
|  |                 } | ||
|  | 
 | ||
|  |                 if (isset($definition->info[$token->name])) { | ||
|  |                     // mostly everything's good, but
 | ||
|  |                     // we need to make sure required attributes are in order
 | ||
|  |                     if (($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) && | ||
|  |                         $definition->info[$token->name]->required_attr && | ||
|  |                         ($token->name != 'img' || $remove_invalid_img) // ensure config option still works
 | ||
|  |                     ) { | ||
|  |                         $attr_validator->validateToken($token, $config, $context); | ||
|  |                         $ok = true; | ||
|  |                         foreach ($definition->info[$token->name]->required_attr as $name) { | ||
|  |                             if (!isset($token->attr[$name])) { | ||
|  |                                 $ok = false; | ||
|  |                                 break; | ||
|  |                             } | ||
|  |                         } | ||
|  |                         if (!$ok) { | ||
|  |                             if ($e) { | ||
|  |                                 $e->send( | ||
|  |                                     E_ERROR, | ||
|  |                                     'Strategy_RemoveForeignElements: Missing required attribute', | ||
|  |                                     $name | ||
|  |                                 ); | ||
|  |                             } | ||
|  |                             continue; | ||
|  |                         } | ||
|  |                         $token->armor['ValidateAttributes'] = true; | ||
|  |                     } | ||
|  | 
 | ||
|  |                     if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) { | ||
|  |                         $textify_comments = $token->name; | ||
|  |                     } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) { | ||
|  |                         $textify_comments = false; | ||
|  |                     } | ||
|  | 
 | ||
|  |                 } elseif ($escape_invalid_tags) { | ||
|  |                     // invalid tag, generate HTML representation and insert in
 | ||
|  |                     if ($e) { | ||
|  |                         $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text'); | ||
|  |                     } | ||
|  |                     $token = new HTMLPurifier_Token_Text( | ||
|  |                         $generator->generateFromToken($token) | ||
|  |                     ); | ||
|  |                 } else { | ||
|  |                     // check if we need to destroy all of the tag's children
 | ||
|  |                     // CAN BE GENERICIZED
 | ||
|  |                     if (isset($hidden_elements[$token->name])) { | ||
|  |                         if ($token instanceof HTMLPurifier_Token_Start) { | ||
|  |                             $remove_until = $token->name; | ||
|  |                         } elseif ($token instanceof HTMLPurifier_Token_Empty) { | ||
|  |                             // do nothing: we're still looking
 | ||
|  |                         } else { | ||
|  |                             $remove_until = false; | ||
|  |                         } | ||
|  |                         if ($e) { | ||
|  |                             $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed'); | ||
|  |                         } | ||
|  |                     } else { | ||
|  |                         if ($e) { | ||
|  |                             $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed'); | ||
|  |                         } | ||
|  |                     } | ||
|  |                     continue; | ||
|  |                 } | ||
|  |             } elseif ($token instanceof HTMLPurifier_Token_Comment) { | ||
|  |                 // textify comments in script tags when they are allowed
 | ||
|  |                 if ($textify_comments !== false) { | ||
|  |                     $data = $token->data; | ||
|  |                     $token = new HTMLPurifier_Token_Text($data); | ||
|  |                 } elseif ($trusted || $check_comments) { | ||
|  |                     // always cleanup comments
 | ||
|  |                     $trailing_hyphen = false; | ||
|  |                     if ($e) { | ||
|  |                         // perform check whether or not there's a trailing hyphen
 | ||
|  |                         if (substr($token->data, -1) == '-') { | ||
|  |                             $trailing_hyphen = true; | ||
|  |                         } | ||
|  |                     } | ||
|  |                     $token->data = rtrim($token->data, '-'); | ||
|  |                     $found_double_hyphen = false; | ||
|  |                     while (strpos($token->data, '--') !== false) { | ||
|  |                         $found_double_hyphen = true; | ||
|  |                         $token->data = str_replace('--', '-', $token->data); | ||
|  |                     } | ||
|  |                     if ($trusted || !empty($comment_lookup[trim($token->data)]) || | ||
|  |                         ($comment_regexp !== null && preg_match($comment_regexp, trim($token->data)))) { | ||
|  |                         // OK good
 | ||
|  |                         if ($e) { | ||
|  |                             if ($trailing_hyphen) { | ||
|  |                                 $e->send( | ||
|  |                                     E_NOTICE, | ||
|  |                                     'Strategy_RemoveForeignElements: Trailing hyphen in comment removed' | ||
|  |                                 ); | ||
|  |                             } | ||
|  |                             if ($found_double_hyphen) { | ||
|  |                                 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed'); | ||
|  |                             } | ||
|  |                         } | ||
|  |                     } else { | ||
|  |                         if ($e) { | ||
|  |                             $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed'); | ||
|  |                         } | ||
|  |                         continue; | ||
|  |                     } | ||
|  |                 } else { | ||
|  |                     // strip comments
 | ||
|  |                     if ($e) { | ||
|  |                         $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed'); | ||
|  |                     } | ||
|  |                     continue; | ||
|  |                 } | ||
|  |             } elseif ($token instanceof HTMLPurifier_Token_Text) { | ||
|  |             } else { | ||
|  |                 continue; | ||
|  |             } | ||
|  |             $result[] = $token; | ||
|  |         } | ||
|  |         if ($remove_until && $e) { | ||
|  |             // we removed tokens until the end, throw error
 | ||
|  |             $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until); | ||
|  |         } | ||
|  |         $context->destroy('CurrentToken'); | ||
|  |         return $result; | ||
|  |     } | ||
|  | } | ||
|  | 
 | ||
|  | // vim: et sw=4 sts=4
 |