手寫javascript json解析器

qiushijie發表於2019-04-02

json是一種通用的資料格式,語法較為簡單,通過寫它的解析器是一種非常好的入門編譯的方法。

詞法分析

將輸入的字串切割成一個一個的token

const TokenTypes = {
  OPEN_OBJECT: '{',
  CLOSE_OBJECT: '}',
  OPEN_ARRAY: '[',
  CLOSE_ARRAY: ']',
  KEY: 'key',
  STRING: 'string',
  NUMBER: 'number',
  TRUE: 'true',
  FALSE: 'false',
  NULL: 'null',
  COLON: ':',
  COMMA: ',',
  EOF: 'eof'
};

class Token {
  constructor(type, string) {
    this.type = type;
    this.string = string || this.type;
  }
}

class Lexer {
  constructor(json) {
    this._json = json;
    this._index = 0;
  }
  _isEnd() {
    return this._index > this._json.length;
  }
  _walk() {
    return this._json[this._index ++];
  }
  _currentChat() {
    return this._json[this._index];
  }
  _nextChar() {
    return this._json[this._index + 1];
  }
  _readString() {
    let tmp = '';
    while(! this._isEnd()) {
      const c = this._walk();
      if (c == '"') break;
      if (c == '\\' && this._nextChar() == '"') {
        this._walk();
        tmp += '"';
        continue;
      }
      tmp += c;
    }
    return new Token(TokenTypes.STRING, tmp);
  }
  _isLetter(c) {
    return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z';
  }
  _isDigit(c) {
    return c >= '0' && c <= '9';
  }
  _readDigit(c) {
    let tmp = c;
    while(! this._isEnd()) {
      const c = this._currentChat();
      if (! this._isDigit(c)) break;
      this._walk();
      tmp += c;
    }
    return new Token(TokenTypes.NUMBER, tmp);
  }
  _readWord(c) {
    let tmp = c;
    while (! this._isEnd()) {
      const c = this._currentChat();
      if (! this._isLetter(c)) break;
      this._walk();
      tmp += c;
    }
    return tmp;
  }
  nextToken() {
    const c = this._walk();
    if (this._isEnd()) return new Token(TokenTypes.EOF);
    switch(c) {
      case ' ':
        return this.nextToken();
      case '{':
        return new Token(TokenTypes.OPEN_OBJECT);
      case '}':
        return new Token(TokenTypes.CLOSE_OBJECT);
      case '[':
        return new Token(TokenTypes.OPEN_ARRAY);
      case ']':
        return new Token(TokenTypes.CLOSE_ARRAY);
      case ':':
        return new Token(TokenTypes.COLON);
      case ',':
        return new Token(TokenTypes.COMMA);
      case '"':
        return this._readString();
    }
    if (this._isDigit(c)) return this._readDigit(c);
    if (this._isLetter(c)) {
      const word = this._readWord(c);
      switch(word) {
        case 'true':
          return new Token(TokenTypes.TRUE);
        case 'false':
          return new Token(TokenTypes.FALSE);
        case 'null':
          return new Token(TokenTypes.NULL);
      }
      throw new Error(`expect true, false, null actual ${word}`);
    }
    throw new Error(`not supported ${c}`);
  }
}
複製程式碼

解析器

通過匹配到不同token跳轉到不同狀態,一步一步完成解析

class Parser {
  constructor(json) {
    this._lexer = new Lexer(json);
    this._token = this._lexer.nextToken();
  }
  _matchToken(type) {
    const string = this._token.string;
    if (! this._isToken(type)) throw new Error(`expect ${type} actual ${this._token.type}`);
    this._walk();
    return string;
  }
  _isToken(type) {
    return this._token.type == type;
  }
  _currentString() {
    return this._token.string;
  }
  _walk() {
    this._token = this._lexer.nextToken();
  }
  parse() {
    return this._visitValue();
  }
  _visitValue() {
    if (this._isToken(TokenTypes.NUMBER)) {
      const str = this._currentString();
      this._walk();
      return parseInt(str);
    }
    if (this._isToken(TokenTypes.TRUE)) {
      this._walk();
      return true;
    }
    if (this._isToken(TokenTypes.FALSE)) {
      this._walk();
      return false;
    }
    if (this._isToken(TokenTypes.STRING)) {
      const str = this._currentString();
      this._walk();
      return str;
    }
    if (this._isToken(TokenTypes.NULL)) {
      this._walk();
      return null;
    }
    if (this._isToken(TokenTypes.OPEN_OBJECT)) {
      this._walk();
      const object = this._visitObject();
      this._matchToken(TokenTypes.CLOSE_OBJECT);
      return object;
    }
    if (this._isToken(TokenTypes.OPEN_ARRAY)) {
      this._walk();
      const array = this._visitArray();
      this._matchToken(TokenTypes.CLOSE_ARRAY);
      return array;
    }
  }
  _visitObject() {
    const object = {};
    while (true) {
      const key = this._matchToken(TokenTypes.STRING);
      this._matchToken(TokenTypes.COLON);
      const value = this._visitValue();
      object[key] = value;
      if (! this._isToken(TokenTypes.COMMA)) break;
      this._walk();
    }
    return object;
  }
  _visitArray() {
    const array = [];
    while (true) {
      const value = this._visitValue();
      array.push(value);
      if (! this._isToken(TokenTypes.COMMA)) break;
      this._walk()
    }
    return array;
  }
}
複製程式碼
const array = [{t: true, f: false, n: null, num: 12, s: 'str'}];
const json = JSON.stringify(array);
// [{"t":true,"f":false,"n":null,"num":12,"s":"str"}]
console.log(json);
const parser = new Parser(json);
const result = parser.parse();
// [ { t: true, f: false, n: null, num: 12, s: 'str' } ]
console.log(result);
複製程式碼

使用

const json = `[{"f": false, "t": true, "s": "string", "n": null, "num": 12}]`;
const parser = new Parser(json);
const result = parser.parse();
console.log(result);
複製程式碼

最終列印

[
  {
    "f": false,
    "t": true,
    "s": "string",
    "n": null,
    "num": 12
  }
]
複製程式碼

github gist 完整程式碼

相關文章