sourceafPegger::Peg.fan


** Parsing Expression Grammar (PEG)
** 
@Js
class Peg {
    private PegCtx  pegCtx
    private Rule    rule
    
    ** Creates a PEG class ready to match the given string and rule.
    new makeRule(Str str, Rule rule) {
        this.rule   = rule
        this.pegCtx = PegCtx(str, rule)
    }

    ** Parses the given PEG pattern in to a rule.
    ** Convenience for:
    ** 
    **   syntax: fantom 
    **   Peg(str, Peg.parseRule(pattern))
    ** 
    ** See `Rule.parseRule`
    new makePattern(Str str, Str rulePattern) {
        this.rule   = Peg.parseRule(rulePattern)
        this.pegCtx = PegCtx(str, rule)
    }

    // ---- Static methods ----
    
    ** Turns debug messaging on and off.
    static Void debugOn(Bool debug := true) {
        Peg#.pod.log.level = (debug ? LogLevel.debug : LogLevel.info)
    }
    
    ** Parses a pattern in to simple rule.
    ** For example:
    ** 
    **   syntax: fantom 
    **   parseRule("[abc] / [xyz]")
    ** 
    ** See `Rule.parseRule`
    static Rule parseRule(Str pattern) {
        PegGrammar().parseRule(pattern)
    }
    
    ** Parses a list of PEG grammar definitions.
    ** For example:
    ** 
    **   syntax: fantom 
    **   parseGrammar("a <- [abc] / [xyz] / b
    **                 b <- \space+ [^abc]")
    ** 
    ** See `Grammar.parseGrammar`
    static Grammar parseGrammar(Str grammar) {
        PegGrammar().parseGrammar(grammar)
    }
    
    ** Returns the grammar PEG used to parse PEG grammar.
    ** 
    ** It's not particularly useful, but it may be interesting to some.
    static Grammar pegGrammar() {
        PegGrammar.pegGrammar
    }
    
    // ---- Instance methods ----

    ** Returns the matched text of the next PEG match, optionally returning the contents of the given label. 
    ** 
    **   syntax: fantom
    **   peg := Peg("a12 + b23 / c69 * z666", "foo:[a-z] [0-9]+")
    **   peg.find         // --> "a12"
    **   peg.find         // --> "b23"
    **   peg.find("foo")  // --> "c"
    **   peg.find("foo")  // --> "z"
    ** 
    ** See [matched()]`matched` to match against the entire string.
    Str? find(Str? label := null, Int? offset := null) {
        match := search(offset)
        return label == null ? match?.matched : match?.findMatch(label)?.matched
    }
    
    ** Searches for the next PEG match and replaces a portion of it.
    ** 
    ** 'replacements' is a map of label names to replacement values.
    ** 
    ** Use the label 'root' to replace the entire Match.
    ** 
    ** Use standard '${interpolation}' in values to reference other matched labels.
    ** Interpolation may be escaped with a backslash - '\${not interpolated}'.
    ** 
    **   syntax: fantom
    **   str := "(34) -> {12}"
    **   pat := "foo:[0-9]+ ' -> ' bar:[0-9]+"
    **   peg := Peg(str, pat)
    **   val := peg.replace(["foo":"\${bar}", "bar":"\${foo}"])
    **       // --> (12) -> {34}
    ** 
    Str replace(Str:Str replacements, Int? offset := null) {
        match := search(offset)
        if (match == null)
            return pegCtx.str
    
        str := StrBuf(pegCtx.str.size).add(pegCtx.str)
        doReplace(str, match, replacements)
        return str.toStr
    }
    
    ** Replaces all occurrences of a PEG match.
    ** 
    ** 'replacements' is a map of label names to replacement values.
    ** 
    ** Use the label 'root' to replace the entire Match.
    ** 
    ** Use standard '${interpolation}' in values to reference other matched labels.
    ** Interpolation may be escaped with a backslash - '\${this is not interpolated}'.
    **
    **   syntax: fantom
    **   str := "I like abba!"
    **   pat := "foo:'a' / bar:'b'"
    **   peg := Peg(str, pat)
    **   val := peg.replaceAll(["foo":"b", "bar":"a"])
    **       // --> I like baab!
    ** 
    Str replaceAll(Str:Str replacements, Int? offset := null) {
        matches := Match[,]
        each |m| { matches.add(m) }
        
        str := StrBuf(pegCtx.str.size).add(pegCtx.str)
        // process the actual replacements in reverse order
        matches.eachr |m| {
            doReplace(str, m, replacements)
        }
        return str.toStr
    }
    

    // replaceAllFn() is similar different to searchAll().eachr |m| { str.replace(m.matchedRage, "sub") }
    // except that replaceAllFn() also performs interpolation / substitution
    ** Calls the given fn on each match, that returns the replacements to be performed. 
    ** 
    ** 'replacements' is a map of label names to replacement values.
    ** 
    ** Use the label 'root' to replace the entire Match.
    ** 
    ** Use standard '${interpolation}' in values to reference other matched labels.
    ** Interpolation may be escaped with a backslash - '\${this is not interpolated}'.
    ** 
    Str replaceAllFn(|Match -> Str:Str| fn) {
        data := Obj[][,]
        each |m| {
            data.add([m, fn(m)])
        }
        
        str := StrBuf(pegCtx.str.size).add(pegCtx.str)
        // process the actual replacements in reverse order
        data.eachr |d| {
            doReplace(str, d[0], d[1])
        }
        return str.toStr
    }
    
    ** Searches for the next PEG match (optionally with the given label) and returns the matched string (if any).
    ** 
    ** Returns 'null' if there are no more matches.
    Match? search(Int? offset := null) {
        if (offset != null)
            pegCtx.rollbackToPos(offset)

        m := null as Match
        while (m == null && !pegCtx.eos) {
            m = match
            if (m == null)
                pegCtx.rollbackToPos(pegCtx.cur + 1)
        }
        
        return m
    }
    
    ** Returns all matches from the given offset.
    Match[] searchAll(Int? offset := null) {
        if (offset != null)
            pegCtx.rollbackToPos(offset)

        matches := Match[,]
        match   := null as Match
        while ((match = search) != null) {
            matches.add(match)
        }
        return matches
    }

    ** Returns 'true' if the string contains a PEG match.
    ** Convenience for:
    **  
    **   fantom: syntax
    **   search != null
    ** 
    ** See [matches()]`matches` to check for a match against the entire string.
    Bool contains() {
        search != null
    }
    
    ** Performs a PEG match against the entire string (from the given offset) - use for matching grammars.
    ** 
    ** See [search()]`search` to *search* for a match anywhere in the string. 
    Match? match(Int? offset := null) {
        if (offset != null) {
            if (offset < 0 || offset >= pegCtx.str.size)
                throw ArgErr("Offset '${offset}' is out of bounds: 0 >= offset >= ${pegCtx.str.size-1}")
            pegCtx.rollbackToPos(offset)
        }

        // it's important to search() that we do NOT roll-back to position 0 

        return pegCtx.clearResults.process(rule)
            ? pegCtx.doSuccess
            : null
    }
    
    ** Performs a PEG match against the entire string.
    ** Returns 'true' if it matches.
    ** 
    ** See [contains()]`contains` to *search* for a match instead.
    Bool matches() {
        match != null
    }
    
    ** Performs a PEG match against the entire string.
    ** Returns the matched string.
    ** 
    ** See [find()]`find` to *search* for a match anywhere in the string.
    Str? matched() {
        match?.matched
    }
    
    ** Searches the string and calls the given function for each PEG Match found.
    ** 
    ** Also see [search()]`search`, and [eachWhile()]`eachWhile`.
    Void each(|Match| fn) {
        c := 0
        while (c < pegCtx.str.size) {
            m := match(c)
            if (m != null) {
                fn(m)
                c = pegCtx.cur
            } else
                c++
        }
    }

    ** Searches the string and calls the given function for each PEG Match found, until a non-null result is returned.
    ** Returns 'null' if there are no matches.
    ** 
    ** Also see [search()]`search`, and [each()]`each`.
    Obj? eachWhile(|Match->Obj?| fn) {
        r := null
        c := 0
        while (r == null && c < pegCtx.str.size) {
            m := match(c)
            if (m != null) {
                r = fn(m)
                c = pegCtx.cur
            } else
                c++
        }
        return r
    }
    
    ** Moves the current position to the given offset; searching will start at this position.
    ** Negative values may be used to denote positions relative to the end of the string.
    @NoDoc  // this shouldn't be needed, given the optional 'offset' params on the other methods
    Void seek(Int offset) {
        pegCtx.rollbackToPos(offset)
    }
    
    private Regex interpolex := "[^\\\\]?\\\$\\{([^}]+)\\}".toRegex
    private Void doReplace(StrBuf str, Match match, Str:Str replacements) {
        matched := Match[,]
        replacements.each |to, from| {
            fromM := match.findMatch(from)
            if (fromM != null)
                matched.add(fromM)
        }

        // reverse the order, to process replacements from the end
        // so we don't mess up the str / ranges for the next replace
        matched.sortr |m1, m2| { m1.matchedRange.start <=> m2.matchedRange.start }

        matched.each |fromM| {
            to := replacements[fromM.name]

            // do the interpolation of replacement values
            interpols := Range:Str[:]
            interpols.ordered = true
            rm := interpolex.matcher(to)
            while (rm.find) {
                label := rm.group(1)
                repla := match.findMatch(label)?.matched ?: ""
                interpols[rm.start(0)..<rm.end(0)] = repla
            }
            toBuf := StrBuf(to.size).add(to)
            keys  := interpols.keys.sortr |i1, i2| { i1.start <=> i2.start }
            keys.each |r| {
                val := interpols[r]
                toBuf.replaceRange(r, val)
            }
            to = toBuf.toStr

            // do the replacement
            str.replaceRange(fromM.matchedRange, to)
        }
    }
}