includes/js-compressor.php
author Dan
Fri, 05 Oct 2007 01:57:00 -0400
changeset 162 e1a22031b5bd
parent 142 ca9118d9c0f2
child 166 d53cc29308f4
permissions -rw-r--r--
Major revamps to the template parser. Fixed a few security holes that could allow PHP to be injected in untimely places in TPL code. Improved Ux for XSS attempt in tplWikiFormat. Documented many functions. Backported much cleaner parser from 2.0 branch. Beautified a lot of code in the depths of the template class. Pretty much a small-scale Extreme Makeover.

<?php

/*
 * Enano - an open-source CMS capable of wiki functions, Drupal-like sidebar blocks, and everything in between
 * Version 1.0.2 (Coblynau)
 * Copyright (C) 2006-2007 Dan Fuhry
 * Javascript compression library - used to compact the client-side Javascript code (all 72KB of it!) to save some bandwidth
 *
 * This program is Free Software; you can redistribute and/or modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for details.
 *
 * This class was written by Andrea Giammarchi and was downloaded from PHPClasses.org. The information page stated that
 * this class is licensed under the GNU General Public License, the terms of which can be found by reading the file
 * "GPL" included with the Enano package.
 */

/**
 * JavaScriptCompressor class,
 *	removes comments or pack JavaScript source[s] code.
 * ______________________________________________________________
 * JavaScriptCompressor (just 2 public methods)
 *    |
 *    |________ getClean(jsSource:mixed):string
 *    |         	returns one or more JavaScript code without comments,
 *    |         	by default removes some spaces too
 *    |
 *    |________ getPacked(jsSource:mixed):string
 *              	returns one or more JavaScript code packed,
 *	        	using getClean and obfuscating output
 * --------------------------------------------------------------
 * Note about $jsSource input varible:
 * 	this var should be a string (i.e. $jsSource = file_get_contents("myFile.js");)
 *      should be an array of strings (i.e. array(file_get_contents("1.js"), file_get_contents("2.js"), ... ))
 *      should be an array with 1 or 2 keys:
 *      	(i.e. array('code'=>file_get_contents("mySource.js")))
 *              (i.e. array('code'=>file_get_contents("mySource.js"), 'name'=>'mySource'))
 *      ... and should be an array of arrays created with theese rules
 *      array(
 *		file_get_contents("secret.js"),
 *              array('code'=>$anotherJS),
 *              array('code'=>$myJSapplication, 'name'=>'JSApplication V 1.0')
 *      )
 *
 *      The name used on dedicated key, will be write on parsed source header
 * --------------------------------------------------------------
 * Note about returned strings:
 * 	Your browser should wrap very long strings, then don't use
 *      cut and paste from your browser, save output into your database or directly
 *      in a file or print them only inside <script> and </script> tags
 * --------------------------------------------------------------
 * Note about parser performance:
 * 	With pure PHP embed code this class should be slow and not really safe
 *      for your server performance then don't parse JavaScript runtime for each
 *      file you need and create some "parsed" caching system
 *      (at least while i've not created a compiled version of theese class functions).
 *      Here there's a caching system example: http://www.phpclasses.org/browse/package/3158.html
 * --------------------------------------------------------------
 * Note about JavaScript packed compatibility:
 * 	To be sure about compatibility include before every script JSL Library:
 *      http://www.devpro.it/JSL/
 * JSL library add some features for old or buggy browsers, one of
 * those functions is String.replace with function as second argument,
 * used by JavaScript generated packed code to rebuild original code.
 *
 * Remember that KDE 3.5, Safari and IE5 will not work correctly with packed version
 * if you'll not include JSL.
 * --------------------------------------------------------------
 * @Compatibility	>= PHP 4
 * @Author		Andrea Giammarchi
 * @see       http://www.devpro.it/
 * @since	    2006/05/31
 * @since		  2006/08/01 [requires SourceMap.class.php to parse source faster and better (dojo.js.uncompressed.js file (211Kb) successfull cleaned or packed)]
 * @version		0.8
 * Dependencies:
 *      Server: BaseConvert.class.php
 *			Server: SourceMap.class.php
 *			Client: JSL.js (http://www.devpro.it/JSL/)
 * Convertion is supported by every browser with JSL Library (FF 1+ Opera 8+ and IE5.5+ are supported without JSL too)
 * @copyright Dean Edwards for his originally idea [dean.edwards.name] and his JavaScript packer
 */
class JavaScriptCompressor {

	/**
	 * public variables
         * 	stats:string		after every compression has some informations
         *      version:string		version of this class
	 */
	var	$stats = '',
		$version = '0.8';

	/** 'private' variables, any comment sorry */
	var	$__startTime = 0,
		$__sourceLength = 0,
		$__sourceNewLength = 0,
		$__totalSources = 0,
		$__sources = array(),
		$__delimeter = array(),
		$__cleanFinder = array("/(\n|\r)+/", "/( |\t)+/", "/(\n )|( \n)|( \n )/", "/[[:space:]]+(\)|})/", "/(\(|{)[[:space:]]+/", "/[[:space:]]*(;|,|:|<|>|\&|\||\=|\?|\+|\-|\%)[[:space:]]*/", "/\)[[:space:]]+{/", "/}[[:space:]]+\(/"),
		$__cleanReplacer = array("\n", " ", "\n", "\\1", "\\1", "\\1", "){", "}("),
		$__BC = null,
		$__SourceMap = null;

	/**
	 * public constructor
         * 	creates a new BaseConvert class variable (base 36)
	 */
	function JavaScriptCompressor() {
		$this->__SourceMap = new SourceMap();
		$this->__BC = new BaseConvert('0123456789abcdefghijklmnopqrstuvwxyz');
		$this->__delimeter = array(
			array('name'=>'doublequote', 'start'=>'"', 'end'=>'"', 'noslash'=>true),
			array('name'=>'singlequote', 'start'=>"'", 'end'=>"'", 'noslash'=>true),
			array('name'=>'singlelinecomment', 'start'=>'//', 'end'=>array("\n", "\r")),
			array('name'=>'multilinecomment', 'start'=>'/*', 'end'=>'*/'),
			array('name'=>'regexp', 'start'=>'/', 'end'=>'/', 'match'=>"/^\/[^\n\r]+\/$/", 'noslash'=>true)
		);
	}

	/**
	 * public method
         * 	getClean(mixed [, bool]):string
         *      compress JavaScript removing comments and somespaces (on by default)
         * @param	mixed		view example and notes on class comments
	 */
	function getClean($jsSource) {
		return $this->__commonInitMethods($jsSource, false);
	}
	
	/**
	 * public method
         * 	getPacked(mixed):string
         *      compress JavaScript replaceing words and removing comments and some spaces
         * @param	mixed		view example and notes on class comments
	 */
	function getPacked($jsSource) {
		return $this->__commonInitMethods($jsSource, true);
	}
	
	/** 'private' methods, any comment sorry */
	function __addCleanCode($str) {
		return preg_replace($this->__cleanFinder, $this->__cleanReplacer, trim($str));
	}
	function __addClean(&$arr, &$str, &$start, &$end, $clean) {
		if($clean)
			array_push($arr, $this->__addCleanCode(substr($str, $start, $end - $start)));
		else
			array_push($arr, substr($str, $start, $end - $start));
	}
	function __clean(&$str) {
		$len = strlen($str);
		$type = '';
		$clean = array();
		$map = $this->__SourceMap->getMap($str, $this->__delimeter);
		for($a = 0, $b = 0, $c = count($map); $a < $c; $a++) {
			$type = &$map[$a]['name'];
			switch($type) {
				case 'code':
				case 'regexp':
				case 'doublequote':
				case 'singlequote':
					$this->__addClean($clean, $str, $map[$a]['start'], $map[$a]['end'], ($type === 'code'));
					if($type !== 'regexp')
						array_push($clean, "\n");	
					break;
			}
 		}
		return preg_replace("/(\n)+/", "\n", trim(implode('', $clean)));
	}
	function __commonInitMethods(&$jsSource, $packed) { 
		$header = '';
		$this->__startTime = $this->__getTime();
		$this->__sourceLength = 0;
		$this->__sourceManager($jsSource);
		for($a = 0, $b = $this->__totalSources; $a < $b; $a++)
			$this->__sources[$a]['code'] = $this->__clean($this->__sources[$a]['code']);
		$header = $this->__getHeader();
		for($a = 0, $b = $this->__totalSources; $a < $b; $a++)
			$this->__sources[$a] = &$this->__sources[$a]['code'];
		$this->__sources = implode(';', $this->__sources);
		if($packed)
			$this->__sources = $this->__pack($this->__sources);
		$this->__sourceNewLength = strlen($this->__sources);
		$this->__setStats();
		return $header.$this->__sources;
	}
	function __getHeader() {
		return implode('', array(
			'/* ',$this->__getScriptNames(),'JavaScriptCompressor ',$this->version,' [www.devpro.it], ',
			'thanks to Dean Edwards for idea [dean.edwards.name]',
			" */\r\n"		
		));
	}
	function __getScriptNames() {
		$a = 0;
		$result = array();
		for($b = $this->__totalSources; $a < $b; $a++) {
			if($this->__sources[$a]['name'] !== '')
				array_push($result, $this->__sources[$a]['name']);
		}
		$a = count($result);
		if($a-- > 0)
			$result[$a] .= ' with ';
		return $a < 0 ? '' : implode(', ', $result);
	}
	function __getSize($size, $dec = 2) {
		$toEval = '';
		$type = array('bytes', 'Kb', 'Mb', 'Gb');
		$nsize = $size;
		$times = 0;
		while($nsize > 1024) {
			$nsize = $nsize / 1024;
			$toEval .= '/1024';
			$times++;
		}
		if($times === 0)
			$fSize = $size.' '.$type[$times];
		else {
			eval('$size=($size'.$toEval.');');
			$fSize =  number_format($size, $dec, '.', '').' '.$type[$times];
		}
		return $fSize;
	}
	function __getTime($startTime = null) {
		list($usec, $sec) = explode(' ', microtime());
		$newtime = (float)$usec + (float)$sec;
		if($startTime !== null)
			$newtime = number_format(($newtime - $startTime), 3);
		return $newtime;
	}
	function __pack(&$str) {
		$container = array();
		$str = preg_replace("/(\w+)/e", '$this->__BC->toBase($this->__wordsParser("\\1",$container));', $this->__clean($str));
		$str = str_replace("\n", '\n', addslashes($str));
		return 'eval(function(A,G){return A.replace(/(\\w+)/g,function(a,b){return G[parseInt(b,36)]})}("'.$str.'","'.implode(',', $container).'".split(",")));';
	}
	function __setStats() {
		$this->stats = implode(' ', array(
			$this->__getSize($this->__sourceLength),
			'to',
			$this->__getSize($this->__sourceNewLength),
			'in',
			$this->__getTime($this->__startTime),
			'seconds'
		));
	}
	function __sourceManager(&$jsSource) {
		$b = count($jsSource);
		$this->__sources = array();
		if(is_string($jsSource))
			$this->__sourcePusher($jsSource, '');
		elseif(is_array($jsSource) && $b > 0) {
			if(isset($jsSource['code']))
				$this->__sourcePusher($jsSource['code'], (isset($jsSource['name']) ? $jsSource['name'] : ''));
			else {
				for($a = 0; $a < $b; $a++) {
					if(is_array($jsSource[$a]) && isset($jsSource[$a]['code'], $jsSource[$a]['name']))
						$this->__sourcePusher($jsSource[$a]['code'], trim($jsSource[$a]['name']));
					elseif(is_string($jsSource[$a]))
						$this->__sourcePusher($jsSource[$a], '');
				}
			}
		}
		$this->__totalSources = count($this->__sources);
	}
	function __sourcePusher(&$code, $name) {
		$this->__sourceLength += strlen($code);
		array_push($this->__sources, array('code'=>$code, 'name'=>$name));
	}
	function __wordsParser($str, &$d) {
		if(is_null($key = array_shift($key = array_keys($d,$str))))
			$key = array_push($d, $str) - 1;
		return $key;
	}
}

/**
 * BaseConvert class,
 *	converts an unsigned base 10 integer to a different base and vice versa.
 * ______________________________________________________________
 * BaseConvert
 *    |
 *    |________ constructor(newBase:string)
 *    |         	uses newBase string var for convertion
 *    |                 [i.e. "0123456789abcdef" for an hex convertion]
 *    |
 *    |________ toBase(unsignedInteger:uint):string
 *    |         	return base value of input
 *    |
 *    |________ fromBase(baseString:string):uint
 *              	return base 10 integer value of base input
 * --------------------------------------------------------------
 * REMEMBER: PHP < 6 doesn't work correctly with integer greater than 2147483647 (2^31 - 1)
 * --------------------------------------------------------------
 * @Compatibility	>= PHP 4
 * @Author		Andrea Giammarchi
 * @Site		http://www.devpro.it/
 * @Date		2006/06/05
 * @Version		1.0
 */

class BaseConvert {
	
	var	$base, $baseLength;
	
	function BaseConvert($base) {
		$this->base = &$base;
		$this->baseLength = strlen($base);
	}
	
	function toBase($num) {
		$module = 0; $result = '';
		while($num) {
			$result = $this->base{($module = $num % $this->baseLength)}.$result;
			$num = (int)(($num - $module) / $this->baseLength);
		}
		return $result !== '' ? $result : $this->base{0};
	}
	
	function fromBase($str) {
		$pos = 0; $len = strlen($str) - 1; $result = 0;
		while($pos < $len)
			$result += pow($this->baseLength, ($len - $pos)) * strpos($this->base, $str{($pos++)});
		return $len >= 0 ? $result + strpos($this->base, $str{($pos)}) : null;
	}
}

/**
* SourceMap class,
*    reads a generic language source code and returns its map.
* ______________________________________________________________
* The SourceMap goals is to create a map of a generic script/program language.
* The getMap method returns an array/list of arrays/dictionary/objects
* of source map using delimeters variable to map correctly:
*  - multi line comments
*  - single line comments
*  - double quoted strings
*  - single quoted strings
*  - pure code
*  - everything else (for example regexp [/re/] with javascript), just adding a correct delimeter
* --------------------------------------------------------------
* What about the delimeter
*     It's an array/list of arrays/dictionary/obects with some properties to find what you're looking for.
*
* parameters are:
*  - name, the name of the delimeter (i.e. "doublequote")
*  - start, one or mode chars to find as start delimeter (i.e. " for double quoted string)
*  - end, one or mode chars to find as end delimeter (i.e. " for double quoted string) [end should be an array/list too]
*
* optional parameters are:
*  - noslash, if true find the end of the delimeter only if last char is not slashed (i.e. "string\"test" find " after test)
*  - match, if choosed language has regexp, verify if string from start to end matches used regexp (i.e. /^\/[^\n\r]+\/$/ for JavaScript regexp)
*
* If end parameter is an array, match and noslash are not supported (i.e. ["\n", "\r"] for end delimeter of a single line comment)
* --------------------------------------------------------------
* What about SourceMap usage
*     It should be a good solution to create sintax highlighter, parser,
*     verifier or some other source code parsing procedure
* --------------------------------------------------------------
* What about SourceMap performance script/languages
*     I've created different version of this class to test each script/program language performance too.
* Python with or without Psyco is actually the faster parser.
* However with this PHP version this class has mapped "dojo.js.uncompressed.js" file (about 211Kb) in less than 0.5 second.
* Test has been done with embed class and PHP as module, any accelerator was used for this PHP test.
* --------------------------------------------------------------
* @Compatibility    >= PHP 4
* @Author        Andrea Giammarchi
* @Site        http://www.devpro.it/
* @Date        2006/08/01
* @LastMOd        2006/08/01
* @Version        0.1
* @Application        Last version of JavaScriptCompressor class use this one to map source code.
*/
class SourceMap {
    
    /**
     * public method
         *     getMap(&$source:string, &$delimeters:array):array
     * Maps the source code using $delimeters rules and returns map as an array
         * NOTE: read comments to know more about map and delimeter
         *
         * @param    string        generic source code
         * @param    array        array with nested array with code rules
     */
    function getMap(&$source, &$delimeters) {
        
        # "unsigned" integer variables
        $sourcePosition = 0;
        $delimetersPosition = 0;
        $findLength = 0;
        $len = 0;
        $tempIndex = 0;
        $sourceLength = strlen($source);
        $delimetersLength = count($delimeters);
        
        # integer variables
        $tempPosition = -1;
        $endPosition = -1;
        
        # array variables
        $map = array();
        $tempMap = array();
        $tempDelimeter = array();
        
        while($sourcePosition < $sourceLength) {
            $endPosition = -1;
            for($delimetersPosition = 0; $delimetersPosition < $delimetersLength; $delimetersPosition++) {
                $tempPosition = strpos($source, $delimeters[$delimetersPosition]['start'], $sourcePosition);
                if($tempPosition !== false && ($tempPosition < $endPosition || $endPosition === -1)) {
                    $endPosition = $tempPosition;
                    $tempIndex = $delimetersPosition;
                }
            }
            if($endPosition !== -1) {
                $sourcePosition = $endPosition;
                $tempDelimeter = &$delimeters[$tempIndex];
                $findLength = strlen($tempDelimeter['start']);
                if(is_array($tempDelimeter['end'])) {
                    $delimetersPosition = 0;
                    $endPosition = -1;
                    for($len = count($tempDelimeter['end']); $delimetersPosition < $len; $delimetersPosition++) {
                        $tempPosition = strpos($source, $tempDelimeter['end'][$delimetersPosition], $sourcePosition + $findLength);
                        if($tempPosition !== false && ($tempPosition < $endPosition || $endPosition === -1)) {
                            $endPosition = $tempPosition;
                            $tempIndex = $delimetersPosition;
                        }    
                    }
                    if($endPosition !== -1)
                        $endPosition = $endPosition + strlen($tempDelimeter['end'][$tempIndex]);
                    else
                        $endPosition = $sourceLength;
                    array_push($map, array('name'=>$tempDelimeter['name'], 'start'=>$sourcePosition, 'end'=>$endPosition));
                    $sourcePosition = $endPosition - 1;
                }
                elseif(isset($tempDelimeter['match'])) {
                    $tempPosition = strpos($source, $tempDelimeter['end'], $sourcePosition + $findLength);
                    $len = strlen($tempDelimeter['end']);
                    if($tempPosition !== false && preg_match($tempDelimeter['match'], substr($source, $sourcePosition, $tempPosition - $sourcePosition + $len))) {
                        $endPosition = isset($tempDelimeter['noslash']) ? $this->__endCharNoSlash($source, $sourcePosition, $tempDelimeter['end'], $sourceLength) : $tempPosition + $len;
                        array_push($map, array('name'=>$tempDelimeter['name'], 'start'=>$sourcePosition, 'end'=>$endPosition));
                        $sourcePosition = $endPosition - 1;
                    }
                }
                else {
                    if(isset($tempDelimeter['noslash']))
                        $endPosition = $this->__endCharNoSlash($source, $sourcePosition, $tempDelimeter['end'], $sourceLength);
                    else {
                        $tempPosition = strpos($source, $tempDelimeter['end'], $sourcePosition + $findLength);
                        if($tempPosition !== false)
                            $endPosition = $tempPosition + strlen($tempDelimeter['end']);
                        else
                            $endPosition = $sourceLength;
                    }
                    array_push($map, array('name'=>$tempDelimeter['name'], 'start'=>$sourcePosition, 'end'=>$endPosition));
                    $sourcePosition = $endPosition - 1;
                }
            }
            else
                $sourcePosition = $sourceLength - 1;
            ++$sourcePosition;
        }
        $len = count($map);
        if($len === 0)
            array_push($tempMap, array('name'=>'code', 'start'=>0, 'end'=>$sourceLength));
        else {
            for($tempIndex = 0; $tempIndex < $len; $tempIndex++) {
                if($tempIndex === 0 && $map[$tempIndex]['start'] > 0)
                    array_push($tempMap, array('name'=>'code', 'start'=>0, 'end'=>$map[$tempIndex]['start']));
                elseif($tempIndex > 0 && $map[$tempIndex]['start'] > $map[$tempIndex-1]['end'])
                    array_push($tempMap, array('name'=>'code', 'start'=>$map[$tempIndex-1]['end'], 'end'=>$map[$tempIndex]['start']));
                array_push($tempMap, array('name'=>$map[$tempIndex]['name'], 'start'=>$map[$tempIndex]['start'], 'end'=>$map[$tempIndex]['end']));
                if($tempIndex + 1 === $len && $map[$tempIndex]['end'] < $sourceLength)
                    array_push($tempMap, array('name'=>'code', 'start'=>$map[$tempIndex]['end'], 'end'=>$sourceLength));
            }
        }
        return $tempMap;
    }
    
    function __endCharNoSlash(&$source, $position, &$find, &$len) {
        $temp = strlen($find);
        do {
            $position = strpos($source, $find, $position + 1);
        }while($position !== false && !$this->__charNoSlash($source, $position));
        if($position === false) $position = $len - $temp;
        return $position + $temp;
    }
    
    function __charNoSlash(&$source, &$position) {
        $next = 1; $len = $position - $next;
        while($len > 0 && $source{$len} === '\\') $len = $position - (++$next);
        return (($next - 1) % 2 === 0);
    }
}
?>