class JMESPath::Lexer

@api private

Constants

NUMBERS
SIMPLE_TOKENS
STATE_EQ
STATE_GT
STATE_IDENTIFIER
STATE_JSON_LITERAL
STATE_LBRACKET
STATE_LT
STATE_NOT
STATE_NUMBER
STATE_PIPE
STATE_QUOTED_STRING
STATE_SINGLE_CHAR
STATE_STRING_LITERAL
STATE_WHITESPACE
TRANSLATION_TABLE
T_COLON
T_COMMA
T_COMPARATOR
T_CURRENT
T_DOT
T_EOF
T_EXPREF
T_FILTER
T_FLATTEN
T_IDENTIFIER
T_LBRACE
T_LBRACKET
T_LITERAL
T_LPAREN
T_NUMBER
T_OR
T_PIPE
T_QUOTED_IDENTIFIER
T_RBRACE
T_RBRACKET
T_RPAREN
T_STAR
T_UNKNOWN
VALID_IDENTIFIERS

Public Instance Methods

tokenize(expression) click to toggle source

@param [String<JMESPath>] expression @return [Array<Hash>]

# File lib/jmespath/lexer.rb, line 161
def tokenize(expression)

  tokens = []
  chars = CharacterStream.new(expression.chars.to_a)

  while chars.current
    case TRANSLATION_TABLE[chars.current]
    when nil
      tokens << Token.new(
        T_UNKNOWN,
        chars.current,
        chars.position
      )
      chars.next
    when STATE_SINGLE_CHAR
      # consume simple tokens like ".", ",", "@", etc.
      tokens << Token.new(
        SIMPLE_TOKENS[chars.current],
        chars.current,
        chars.position
      )
      chars.next
    when STATE_IDENTIFIER
      start = chars.position
      buffer = []
      begin
        buffer << chars.current
        chars.next
      end while VALID_IDENTIFIERS.include?(chars.current)
      tokens << Token.new(
        T_IDENTIFIER,
        buffer.join,
        start
      )
    when STATE_WHITESPACE
      # skip whitespace
      chars.next
    when STATE_LBRACKET
      # consume "[", "[?" and "[]"
      position = chars.position
      actual = chars.next
      if actual == ']'
        chars.next
        tokens << Token.new(T_FLATTEN, '[]', position)
      elsif actual == '?'
        chars.next
        tokens << Token.new(T_FILTER, '[?', position)
      else
        tokens << Token.new(T_LBRACKET, '[',  position)
      end
    when STATE_STRING_LITERAL
      # consume raw string literals
      tokens << inside(chars, "'", T_LITERAL)
    when STATE_PIPE
      # consume pipe and OR
      tokens << match_or(chars, '|', '|', T_OR, T_PIPE)
    when STATE_JSON_LITERAL
      # consume JSON literals
      token = inside(chars, '`', T_LITERAL)
      if token.type == T_LITERAL
        token.value = token.value.gsub('\`', '`')
        token = parse_json(token)
      end
      tokens << token
    when STATE_NUMBER
      start = chars.position
      buffer = []
      begin
        buffer << chars.current
        chars.next
      end while NUMBERS.include?(chars.current)
      tokens << Token.new(
        T_NUMBER,
        buffer.join.to_i,
        start
      )
    when STATE_QUOTED_STRING
      # consume quoted identifiers
      token = inside(chars, '"', T_QUOTED_IDENTIFIER)
      if token.type == T_QUOTED_IDENTIFIER
        token.value = "\"#{token.value}\""
        token = parse_json(token)
      end
      tokens << token
    when STATE_EQ
      # consume equals
      tokens << match_or(chars, '=', '=', T_COMPARATOR, T_UNKNOWN)
    when STATE_NOT
      # consume not equals
      tokens << match_or(chars, '!', '=', T_COMPARATOR, T_UNKNOWN)
    else
      # either '<' or '>'
      # consume less than and greater than
      tokens << match_or(chars, chars.current, '=', T_COMPARATOR, T_COMPARATOR)
    end
  end
  tokens << Token.new(T_EOF, nil, chars.position)
  tokens
end

Private Instance Methods

inside(chars, delim, type) click to toggle source
# File lib/jmespath/lexer.rb, line 272
def inside(chars, delim, type)
  position = chars.position
  current = chars.next
  buffer = []
  while current != delim
    if current == '\'
      buffer << current
      current = chars.next
    end
    if current.nil?
      # unclosed delimiter
      return Token.new(T_UNKNOWN, buffer.join, position)
    end
    buffer << current
    current = chars.next
  end
  chars.next
  Token.new(type, buffer.join, position)
end
match_or(chars, current, expected, type, or_type) click to toggle source
# File lib/jmespath/lexer.rb, line 263
def match_or(chars, current, expected, type, or_type)
  if chars.next == expected
    chars.next
    Token.new(type, current + expected, chars.position - 1)
  else
    Token.new(or_type, current, chars.position - 1)
  end
end
parse_json(token) click to toggle source
# File lib/jmespath/lexer.rb, line 292
def parse_json(token)
  begin
    token.value = JSON.load(token.value)
  rescue JSON::ParserError
    token.type = T_UNKNOWN
  end
  token
end