//
// Copyright (c) 2009, SkyFoundry LLC
// All Rights Reserved
//
// History:
// 06 Jun 09 Brian Frank Creation
// 28 Dec 09 Brian Frank DataReader => ZincReader
//
**
** ZincReader deserializes Grids from an input stream.
** See [docSkyspark]`docSkySpark::Zinc`
**
@Js
class ZincReader : GridReader
{
//////////////////////////////////////////////////////////////////////////
// Constructor
//////////////////////////////////////////////////////////////////////////
** Wrap input stream
new make(InStream in)
{
this.in = in
this.cur = readChar
this.peek = readChar
consumeToken
}
//////////////////////////////////////////////////////////////////////////
// Public
//////////////////////////////////////////////////////////////////////////
**
** Read a grid from stream.
**
override Grid readGrid()
{
readVer
meta := readMeta; consumeNewline
cols := readCols
set := ZincGrid(meta, cols)
readRows(set)
return set
}
**
** Read a list of grids separated by blank line from stream.
**
override Grid[] readGrids()
{
grids := Grid[,]
while (tok == identifier)
grids.add(readGrid)
return grids
}
**
** Read a set of tags as name/value pairs formatted as "name: val"
** separated by space. I val is omitted, then Marker.val is assumed.
** This is the same as the Zinc 'meta' production.
** Also see `ZincWriter.tagsToStr`.
**
Dict readTags()
{
readMeta
}
**
** Read scalar value: Bool, Int, Str, Uri, etc
** Also see `ZincWriter.scalarToStr`.
**
Obj readScalar()
{
consumeScalar
}
** Close the underlying stream.
Bool close() { in.close }
//////////////////////////////////////////////////////////////////////////
// Implementation
//////////////////////////////////////////////////////////////////////////
private Void readVer()
{
verifyId("ver")
consumeToken
verify(':')
consumeToken
if (val == "2.0") version = 2
else if (val == "1.0") version = 1
else throw IOErr("Unsupported version: $val")
consumeToken
}
internal Dict readMeta()
{
[Str:Obj?]? map := null
while (tok != ',' && tok != '}' && tok != newline && tok != eof)
{
if (map == null) map = Str:Obj?[:]
Str key := consumeId
Obj? val := Marker.val
if (tok == ':')
{
consumeToken
val = consumeScalar
}
map.add(key, val)
}
return map == null ? Etc.emptyDict : Etc.makeDict(map)
}
private ZincCol[] readCols()
{
cols := ZincCol[,]
while (true)
{
cols.add(readCol(cols.size))
if (tok == ',') { consumeToken; continue }
if (tok != eof) consumeNewline
break
}
return cols
}
private ZincCol readCol(Int index)
{
name := consumeId
Str? dis := null
if (tok == scalar && val is Str)
{
dis = consumeScalar
if (version >= 2) throw Err("Invalid col dis in 2.0 grid: $name")
}
meta := readMeta
if (dis != null) meta = Etc.dictSet(meta, "dis", dis)
return ZincCol(name, meta)
}
private Void readRows(ZincGrid grid)
{
numCols := grid.cols.size
while (tok != newline && tok != eof)
grid.rows.add(readRow(grid, numCols))
if (tok != eof) consumeNewline
}
private ZincRow readRow(ZincGrid grid, Int numCols)
{
cells := Obj?[,]
cells.capacity = numCols
for (i:=1; i<numCols; ++i)
{
cells.add(readCell)
verify(',')
consumeToken
}
cells.add(readCell)
if (tok != eof) consumeNewline
return ZincRow(grid.ref, cells)
}
private Obj? readCell()
{
if (tok == ',' || tok == newline || tok == eof) return null
val := consumeScalar
Str? dis := null
if (tok == scalar && this.val is Str) dis = consumeScalar
meta := readMeta
if (dis == null && meta.isEmpty) return val
if (version >= 2) throw Err("Invalid cell dis/meta in 2.0 grid: $dis $meta")
return val
}
//////////////////////////////////////////////////////////////////////////
// Diff
//////////////////////////////////////////////////////////////////////////
// Must be in-sync with proj::Diff (we mask with force flag)
private static const Int diffUpdate := 0x08
private static const Int diffAdd := 0x01.or(0x08)
private static const Int diffRemove := 0x02.or(0x08)
**
** Support to read proj::Diff - nust override makeDiff
**
@NoDoc Obj? readDiff()
{
// skip comment line
while (tok == '#') skipLine
if (tok == eof) return null
flags := 0
switch (tok)
{
case '^': flags = diffUpdate
case '+': flags = diffAdd
case '-': flags = diffRemove
default: throw err("Expected diff ^ + - , not ${tokenType(tok)}")
}
consumeToken
verify('{')
consumeToken
if (consumeId != "id") throw Err()
verify(':')
consumeToken
Ref id := consumeScalar
verify(',')
consumeToken
if (consumeId != "mod") throw Err()
verify(':')
consumeToken
DateTime mod := consumeScalar
changes := Str:Obj?[:]
while(tok == ',')
{
consumeToken
name := parseTagName(consumeId)
Obj? val := Marker.val
if (tok == ':')
{
consumeToken
val = consumeScalar
}
changes[name] = val
}
if (tok != '}') throw err("Expecting } end of diff, not ${tokenType(tok)}")
consumeToken
if (tok != newline) throw err("Expecting newline, not ${tokenType(tok)}")
consumeToken
return makeDiff(id, mod, Etc.makeDict(changes), flags)
}
@NoDoc virtual Obj makeDiff(Ref id, DateTime mod, Dict changes, Int flags)
{
throw UnsupportedErr()
}
//////////////////////////////////////////////////////////////////////////
// Tokenizing
//////////////////////////////////////////////////////////////////////////
**
** Read the next token, store result in `tok` and `val`:
**
** tok val
** ------ -----
** symbol null
** identifier Str
** literal Bool, Int, Float, Str, Uri
**
private Int consumeToken()
{
// reset
val = null
// skip whitespace or comments
while (cur.isSpace || cur == '/')
{
if (cur == '\n') { ++line; consumeChar; return tok = newline }
if (cur == '/') consumeComment
else consumeChar
}
// handle various starting chars
if (cur == 'B') return consumeBin
if (cur == 'C') return consumeCoor
if (cur.isAlpha) return consumeWord
if (cur == '"') return consumeStr
if (cur == '@') return consumeRef
if (cur.isDigit || (cur == '-' && peek.isDigit)) return consumeNum
if (cur == '-' && peek == 'I') return consumeWord
if (cur == '`') return consumeUri
// symbol
tok = cur; consumeChar
return tok
}
private Void verify(Int expected)
{
if (tok != expected) throw err("Expecting ${tokenType(expected)}, not ${tokenType(tok)} ($val)")
}
private Void verifyId(Str id)
{
if (tok != identifier) throw err("Expecting identifier, not ${tokenType(tok)}")
if (id != val) throw err("Expecting $id, not $val")
}
internal Str consumeId() { verify(identifier); x := val; consumeToken; return x }
private Obj? consumeScalar() { verify(scalar); x := val; consumeToken; return x }
private Void consumeNewline() { verify(newline); consumeToken }
private Void consumeSymbol(Int symbol) { verify(symbol); consumeToken }
private Int consumeWord()
{
// parse xxx-xxx and keep track of dashes
s := StrBuf().addChar(cur)
consumeChar
while (cur.isAlphaNum || cur == '_')
{
s.addChar(cur)
consumeChar
}
// if lowercase it is an identifier
id := s.toStr
if (id[0].isLower) { val = id; return tok = identifier }
// otherwise must be keyword
switch (id)
{
case "T": val = true; return tok = scalar
case "F": val = false; return tok = scalar
case "N": val = null; return tok = scalar
case "M": val = Marker.val; return tok = scalar
case "R": val = Remove.val; return tok = scalar
case "NaN": val = Number.nan; return tok = scalar
case "INF": val = Number.posInf; return tok = scalar
case "-INF": val = Number.negInf; return tok = scalar
default: throw err("Unknown keyword $id")
}
}
private Int consumeBin()
{
// Bin
consumeBinChars("Bin")
// Version 2.0: Bin(text/plain)
mime := Bin.defVal.mime.toStr
if (cur == '(')
{
consumeChar
s := StrBuf()
while (cur != ')' && cur > 0)
{
s.addChar(cur)
consumeChar
}
consumeChar
mime = s.toStr
}
// Version 1.0: Bin mime:"text/plain"
else
{
if (version >= 2) throw Err("Expecting 2.0 Bin(mime) syntax")
consumeBinChars(" mime:\"")
s := StrBuf()
while (cur != '"' && cur > 0)
{
s.addChar(cur)
consumeChar
}
consumeChar
mime = s.toStr
}
val = parseBin(mime)
return tok = scalar
}
private Void consumeBinChars(Str expected)
{
for (i:=0; i<expected.size; ++i)
{
expectedChar := expected[i]
if (cur != expectedChar) throw err("Expecting Bin '$expected'; '$expectedChar.toChar' != '$cur.toChar'")
consumeChar
}
}
private Int consumeCoor()
{
s := StrBuf()
while (cur != ')')
{
s.addChar(cur)
consumeChar
}
s.addChar(cur)
consumeChar
val = parseCoord(s.toStr)
return tok = scalar
}
private Int consumeNum()
{
s := StrBuf().addChar(cur);
consumeChar
// digits | - | + | _ | : | .
colons := 0; dashes := 0; exp := false
unitIndex := 0
while (true)
{
if (!cur.isDigit)
{
if (cur == '-') dashes++
else if (cur == ':') colons++
else if ((cur == 'e' || cur == 'E') && (peek == '-' || peek == '+' || peek.isDigit)) exp = true
else if (cur.isAlpha || cur > 128 || cur == '/' || cur == '%' || cur == '$') { if (unitIndex == 0) unitIndex = s.size }
else if (cur == '_') { if (unitIndex == 0 && peek.isDigit) { consumeChar; continue } else { if (unitIndex == 0) unitIndex = s.size } }
else if (cur != '+' && cur != '.') break
}
s.addChar(cur)
consumeChar
}
// old RecId syntax
if (s.size == 17 && dashes == 1 && s[8] == '-')
{
val = null
try
{
val = Ref.fromRecIdStr(s.toStr)
}
catch {} // fall-thru to Number
if (val != null)
{
if (version >= 2) throw Err("Using old RecId syntax in 2.0 zinc grid: $s")
return tok = scalar
}
}
// Time literal
if (dashes == 0 && colons > 0)
{
if (s[1] == ':') s.insert(0, "0")
if (colons == 1) s.add(":00")
val = parseTime(s.toStr)
return tok = scalar
}
// Date literal
if (dashes == 2 && colons == 0)
{
val = parseDate(s.toStr)
return tok = scalar
}
// DateTime
if (dashes >= 2)
{
// xxx timezone
if (cur != ' ' || !peek.isUpper)
{
if (s[-1] == 'Z') s.add(" UTC")
else throw err("Expecting timezone")
}
else
{
consumeChar; s.addChar(' ')
while (cur.isAlpha || cur == '_') { s.addChar(cur); consumeChar }
// handle GMT+xx or GMT-xx
if ((cur == '+' || cur == '-') && s[-3] == 'G' && s[-2] == 'M' && s[-1] == 'T')
{
s.addChar(cur); consumeChar
while (cur.isDigit) { s.addChar(cur); consumeChar }
}
}
val = parseDateTime(s.toStr)
return tok = scalar
}
// parse Number value
Unit? unit := null
Float float := 0f
if (unitIndex == 0)
{
val = parseNumber(s.toStr, null)
}
else
{
str := s.toStr
val = parseNumber(str[0..<unitIndex], str[unitIndex..-1])
}
return tok = scalar
}
private Int consumeRef()
{
// @id part
if (cur != '@') throw IOErr("Expecting @ for ref literal")
consumeChar
s := StrBuf()
while (true)
{
ch := cur
if (Ref.isIdChar(ch))
{
consumeChar
s.addChar(ch)
}
else
{
break
}
}
id := s.toStr
// optional "dis" part
Str? dis := null
if (cur == ' ' && peek == '"')
{
consumeChar
consumeStr
dis = val
}
this.val = dis == null ? parseRef(id) : Ref(id, dis)
return this.tok = scalar
}
private Int consumeStr()
{
if (cur != '"') throw IOErr("Expecting \" for str literal")
consumeChar // opening quote
s := StrBuf()
while (true)
{
ch := cur
if (ch == '"') { consumeChar; break }
if (ch == eof) throw err("Unexpected end of str")
if (ch == '\\') { s.addChar(consumeEscape); continue }
consumeChar
s.addChar(ch)
}
val = parseStr(s.toStr)
return tok = scalar
}
private Int consumeUri()
{
consumeChar // opening backtick
s := StrBuf()
while (true)
{
ch := cur
if (ch == '`') { consumeChar; break }
if (ch == eof || ch == '\n') throw err("Unexpected end of uri")
if (ch == '\\')
{
switch (peek)
{
case ':': case '/': case '?': case '#':
case '[': case ']': case '@': case '\\':
case '&': case '=': case ';':
s.addChar(ch)
s.addChar(peek)
consumeChar
consumeChar
default:
s.addChar(consumeEscape)
}
}
else
{
consumeChar
s.addChar(ch)
}
}
val = parseUri(s.toStr)
return tok = scalar
}
private Void consumeComment()
{
if (peek != '/') throw err("Expecting comment")
consumeChar; consumeChar
while (cur != '\n' && cur != eof) consumeChar
}
//////////////////////////////////////////////////////////////////////////
// Parse/Intern Hooks
//////////////////////////////////////////////////////////////////////////
@NoDoc virtual Str parseTagName(Str s) { s }
@NoDoc virtual Number parseNumber(Str float, Str? unit) { Number(Float(float), unit == null ? null : Number.loadUnit(unit)) }
@NoDoc virtual Ref parseRef(Str s) { Ref(s) }
@NoDoc virtual Str parseStr(Str s) { s }
@NoDoc virtual Uri parseUri(Str s) { Uri.fromStr(s) }
@NoDoc virtual Date parseDate(Str s) { Date.fromStr(s) }
@NoDoc virtual Time parseTime(Str s) { Time.fromStr(s) }
@NoDoc virtual DateTime parseDateTime(Str s) { DateTime.fromStr(s) }
@NoDoc virtual Bin parseBin(Str mime) { Bin(mime) }
@NoDoc virtual Coord parseCoord(Str s) { Coord.fromStr(s) }
//////////////////////////////////////////////////////////////////////////
// Char Reads
//////////////////////////////////////////////////////////////////////////
private Int consumeEscape()
{
// consume slash
consumeChar
// check basics
switch (cur)
{
case 'b': consumeChar; return '\b'
case 'f': consumeChar; return '\f'
case 'n': consumeChar; return '\n'
case 'r': consumeChar; return '\r'
case 't': consumeChar; return '\t'
case '"': consumeChar; return '"'
case '$': consumeChar; return '$'
case '\'': consumeChar; return '\''
case '`': consumeChar; return '`'
case '\\': consumeChar; return '\\'
}
// check for uxxxx
if (cur == 'u')
{
consumeChar
n3 := cur.fromDigit(16); consumeChar
n2 := cur.fromDigit(16); consumeChar
n1 := cur.fromDigit(16); consumeChar
n0 := cur.fromDigit(16); consumeChar
if (n3 == null || n2 == null || n1 == null || n0 == null) throw err("Invalid hex value for \\uxxxx")
return n3.shiftl(12).or(n2.shiftl(8)).or(n1.shiftl(4)).or(n0)
}
throw err("Invalid escape sequence")
}
private Void consumeChar()
{
cur = peek
peek = readChar
}
private Int readChar()
{
ch := in.readChar
return ch ?: 0
}
private Void skipLine()
{
while (true)
{
ch := in.readChar
if (ch == '\n' || ch == null) break
}
++line
this.cur = readChar
this.peek = readChar
consumeToken
}
private ParseErr err(Str msg) { ParseErr(msg + " [Line $line]") }
//////////////////////////////////////////////////////////////////////////
// TokenTypes
//////////////////////////////////////////////////////////////////////////
private Str tokenType(Int ch)
{
switch (ch)
{
case 'I': return "identifier"
case 'V': return "scalar"
case 'N': return "newline"
default: return ch.toChar
}
}
//////////////////////////////////////////////////////////////////////////
// Fields
//////////////////////////////////////////////////////////////////////////
private const static Int eof := 0
private const static Int identifier := 'I'
private const static Int scalar := 'V'
private const static Int newline := 'N'
internal Int tok // symbol value or token constant such as identifier
internal Obj? val // value of literal or identifier
private InStream in
private Int version := 0 // 0 unknown, 1 or 2
internal Int cur
private Int peek
@NoDoc Int line := 1 { private set }
}