72 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			PHP
		
	
	
		
		
			
		
	
	
			72 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			PHP
		
	
	
| 
								 | 
							
								<?php
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								/**
							 | 
						||
| 
								 | 
							
								 * Parses a URI into the components and fragment identifier as specified
							 | 
						||
| 
								 | 
							
								 * by RFC 3986.
							 | 
						||
| 
								 | 
							
								 */
							 | 
						||
| 
								 | 
							
								class HTMLPurifier_URIParser
							 | 
						||
| 
								 | 
							
								{
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * Instance of HTMLPurifier_PercentEncoder to do normalization with.
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    protected $percentEncoder;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    public function __construct()
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        $this->percentEncoder = new HTMLPurifier_PercentEncoder();
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    /**
							 | 
						||
| 
								 | 
							
								     * Parses a URI.
							 | 
						||
| 
								 | 
							
								     * @param $uri string URI to parse
							 | 
						||
| 
								 | 
							
								     * @return HTMLPurifier_URI representation of URI. This representation has
							 | 
						||
| 
								 | 
							
								     *         not been validated yet and may not conform to RFC.
							 | 
						||
| 
								 | 
							
								     */
							 | 
						||
| 
								 | 
							
								    public function parse($uri)
							 | 
						||
| 
								 | 
							
								    {
							 | 
						||
| 
								 | 
							
								        $uri = $this->percentEncoder->normalize($uri);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        // Regexp is as per Appendix B.
							 | 
						||
| 
								 | 
							
								        // Note that ["<>] are an addition to the RFC's recommended
							 | 
						||
| 
								 | 
							
								        // characters, because they represent external delimeters.
							 | 
						||
| 
								 | 
							
								        $r_URI = '!'.
							 | 
						||
| 
								 | 
							
								            '(([a-zA-Z0-9\.\+\-]+):)?'. // 2. Scheme
							 | 
						||
| 
								 | 
							
								            '(//([^/?#"<>]*))?'. // 4. Authority
							 | 
						||
| 
								 | 
							
								            '([^?#"<>]*)'.       // 5. Path
							 | 
						||
| 
								 | 
							
								            '(\?([^#"<>]*))?'.   // 7. Query
							 | 
						||
| 
								 | 
							
								            '(#([^"<>]*))?'.     // 8. Fragment
							 | 
						||
| 
								 | 
							
								            '!';
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        $matches = array();
							 | 
						||
| 
								 | 
							
								        $result = preg_match($r_URI, $uri, $matches);
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        if (!$result) return false; // *really* invalid URI
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        // seperate out parts
							 | 
						||
| 
								 | 
							
								        $scheme     = !empty($matches[1]) ? $matches[2] : null;
							 | 
						||
| 
								 | 
							
								        $authority  = !empty($matches[3]) ? $matches[4] : null;
							 | 
						||
| 
								 | 
							
								        $path       = $matches[5]; // always present, can be empty
							 | 
						||
| 
								 | 
							
								        $query      = !empty($matches[6]) ? $matches[7] : null;
							 | 
						||
| 
								 | 
							
								        $fragment   = !empty($matches[8]) ? $matches[9] : null;
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        // further parse authority
							 | 
						||
| 
								 | 
							
								        if ($authority !== null) {
							 | 
						||
| 
								 | 
							
								            $r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
							 | 
						||
| 
								 | 
							
								            $matches = array();
							 | 
						||
| 
								 | 
							
								            preg_match($r_authority, $authority, $matches);
							 | 
						||
| 
								 | 
							
								            $userinfo   = !empty($matches[1]) ? $matches[2] : null;
							 | 
						||
| 
								 | 
							
								            $host       = !empty($matches[3]) ? $matches[3] : '';
							 | 
						||
| 
								 | 
							
								            $port       = !empty($matches[4]) ? (int) $matches[5] : null;
							 | 
						||
| 
								 | 
							
								        } else {
							 | 
						||
| 
								 | 
							
								            $port = $host = $userinfo = null;
							 | 
						||
| 
								 | 
							
								        }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        return new HTMLPurifier_URI(
							 | 
						||
| 
								 | 
							
								            $scheme, $userinfo, $host, $port, $path, $query, $fragment);
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								// vim: et sw=4 sts=4
							 |