package query import ( "fmt" "strings" "unicode" "unicode/utf8" ) // itemType identifies the type of lex items. type itemType int const eof = -1 const ( itemNull itemType = iota itemError // error occurred; // value is text of error itemEOF itemLeftParen // ( itemRightParen // ) itemOr // | itemAnd // , itemNot // ! itemLt // << itemLtEq // <=, < itemGt // >> itemGtEq // >=, > itemEq // = itemPatMatch // % itemRegexp // ~ itemLeftCurly // { itemRightCurly // } itemString ) // item represents a token returned from the scanner. type item struct { typ itemType // Type, such as itemNumber. val string // Value, such as "23.2". } func (i item) String() string { if i.typ == itemString { return fmt.Sprintf("%#v", i.val) } if i.typ == itemEOF { return "" } if i.typ == itemError { return fmt.Sprintf("error: %s", i.val) } if i.typ == itemNull { return "" } return i.val } // stateFn represents the state of the scanner // as a function that returns the next state. type stateFn func(*lexer) stateFn // lexer holds the state of the scanner. type lexer struct { name string // used only for error reports. input string // the string being scanned. start int // start position of this item. pos int // current position in the input. width int // width of last rune read from input. items chan item // channel of scanned items. last item } func lex(name, input string) (*lexer, chan item) { l := &lexer{ name: name, input: input, items: make(chan item), } go l.run() // Concurrently run state machine. return l, l.items } // emit passes an item back to the client. func (l *lexer) emit(t itemType) { l.items <- item{t, l.input[l.start:l.pos]} l.start = l.pos } // run lexes the input by executing state functions until // the state is nil. func (l *lexer) run() { for state := lexMain; state != nil; { state = state(l) } close(l.items) // No more tokens will be delivered. } // next returns the next rune in the input. func (l *lexer) next() (r rune) { if l.pos >= len(l.input) { l.width = 0 return eof } r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) l.pos += l.width return r } // ignore skips over the pending input before this point. func (l *lexer) ignore() { l.start = l.pos } // backup steps back one rune. // Can be called only once per call of next. func (l *lexer) backup() { l.pos -= l.width } func (l *lexer) Current() item { if l.last.typ == 0 { l.last = <-l.items } return l.last } func (l *lexer) Consume() { l.last = <-l.items } // error returns an error token and terminates the scan // by passing back a nil pointer that will be the next // state, terminating l.run. func (l *lexer) errorf(format string, args ...interface{}) stateFn { l.items <- item{ itemError, fmt.Sprintf(format, args...), } return nil } func lexMain(l *lexer) stateFn { switch r := l.next(); { case r == eof: l.emit(itemEOF) return nil case unicode.IsSpace(r): l.ignore() case r == '(': l.emit(itemLeftParen) case r == ')': l.emit(itemRightParen) case r == '{': l.emit(itemLeftCurly) case r == '}': l.emit(itemRightCurly) case r == '|': l.emit(itemOr) case r == ',': l.emit(itemAnd) case r == '!': l.emit(itemNot) case r == '<': r2 := l.next() if r2 == '<' { l.emit(itemLt) } else if r2 == '=' { l.emit(itemLtEq) } else { l.backup() l.emit(itemLtEq) } case r == '>': r2 := l.next() if r2 == '>' { l.emit(itemGt) } else if r2 == '=' { l.emit(itemGtEq) } else { l.backup() l.emit(itemGtEq) } case r == '=': l.emit(itemEq) case r == '%': l.emit(itemPatMatch) case r == '~': l.emit(itemRegexp) default: l.backup() return lexString } return lexMain } func lexString(l *lexer) stateFn { r := l.next() // quoted string if r == '"' || r == '\'' { quote := r result := "" l.ignore() for { r = l.next() if r == quote { l.ignore() l.items <- item{itemString, result} return lexMain } if r == '\\' { r = l.next() } if r == eof { return l.errorf("unexpected eof in quoted string") } result = result + string(r) } } else { // unquoted string for { if unicode.IsSpace(r) || strings.IndexRune("()|,!{}", r) > 0 { l.backup() l.emit(itemString) return lexMain } if r == eof { l.emit(itemString) l.emit(itemEOF) return nil } r = l.next() } } }