dojo.declare('dojox.markup.Parser', null,
{
    constructor: function (syntax)
    {
        this.syntax = syntax;
    },

    parse: function (text)
    {
        var defaultCreate = function (match, def) {
            return new def.token(match, def.args);
        }

        // This is where the actual lexing happens.  The text 
        // is progressively converted to a token stream.

        var stream = [];
        while ( text.length > 0 )
        {
            var found = false;

            for (var i = 0; i < this.syntax.tokens.length; i++) {
                var def = this.syntax.tokens[i];
                var re = new RegExp("^"+def.regex);
                var match = re.exec(text);

                if (match) {
                    // If one of the regex's matches, we:
                    //
                    //  (1) create the token and put it on the token stream.
                    //  (2) remove the part of the text that matched.
                    //

                    var create = def.create || defaultCreate;
                    var token = create(match, def);
                    var length = token.length || match[0].length;

                    // avoids the infinite loop!
                    if (length > 0) {
                        //console.debug('regex: '+def.regex);
                        //console.debug('match: '+match[0]);
                        found = true;
                        text = text.substr(length, text.length);
                        stream.push(token);
                    }
                    break;
                }
            };

            if (!found) {
                // No tokens were matched, this is a syntax error.
                console.error('syntax error: "' + text + '"');
                break;
            }
        };

        if (this.syntax.postProcess) {
            this.syntax.postProcess(stream);
        }

        return this._serializeTokenStream(stream);
    },

    // converts the token stream into HTML.
    _serializeTokenStream: function (stream)
    {
        var html = '';
        var last_token = null;
        dojo.forEach(stream, function (token) {
            if ( !last_token || last_token.declaredClass != token.declaredClass ) {
                if (last_token && last_token.grouped) {
                    html += last_token.endGroup();
                }
                if (token.grouped) {
                    html += token.startGroup();
                }
            }
            last_token = token;

            html += token.toHTML();
        });
        return html;
    }
});

//
// In this approach, you would have a standard set of tokens, which
// would be used for all markup languages (of course, a language can
// make its own tokens if needed too).  Correctly designing how these
// work is the trickest part of this approach.
//

// TODO: The BoldToken, ItalicToken and HeaderToken could all be 
// written as "SimpleInlineToken" or something, which takes an 
// args.tagName option, to decide what HTML tag it is.

dojo.declare('dojox.markup.BoldToken', null, {
    constructor: function (value) {
        this.value = value[1];
    },
    toHTML: function () {
        return "<b>"+this.value+"</b>";
    }
});

dojo.declare('dojox.markup.ItalicToken', null, {
    constructor: function (value) {
        this.value = value[1];
    },
    toHTML: function () {
        return "<i>"+this.value+"</i>";
    }
});

dojo.declare('dojox.markup.HeaderToken', null, {
    constructor: function (value, args) {
        this.value = value[1];
        this.level = args.level || 1;
    },
    toHTML: function () {
        var tag = 'h'+this.level;
        return '<'+tag+'>'+this.value+'</'+tag+'>';
    }
});

dojo.declare('dojox.markup.LinkToken', null, {
    constructor: function (value) {
        this.href  = value[1];
        this.value = value[2];
    },
    toHTML: function () {
        return '<a href="'+this.href+'">'+this.value+'</a>';
    }
});

dojo.declare('dojox.markup.ListItemToken', null, {
    grouped: true,
    constructor: function (value, args) {
        this.value = value[1];
        this.ordered = args.ordered || false;
    },
    startGroup: function () {
        return this.ordered ? '<ol>' : '<ul>';
    },
    endGroup: function () {
        return this.ordered ? '</ol>' : '</ul>';
    },
    toHTML: function () {
        return '<li>'+this.value+'</li>';
    }
});

dojo.declare('dojox.markup.TextToken', null, {
    constructor: function (value) {
        this.value = value[1] || value[0];
    },
    toHTML: function () {
        return this.value;
    }
});


