參考用C++編寫一個簡易的JSON解析器(1) 寫一個動態型別 - 知乎
歡迎測試和反饋bug
首先,json包含string,number,integer,object,array,bool,null這些型別
對於object對映,使用map,對於array使用vector
我們定義一個類Val用來儲存,使用variant來儲存具體的值std::variant - cppreference.com
然後寫好對應的初始化
class Val
{
public:
using List = std::vector<Val>;
using Dict = std::map<std::string, Val>;
using val = std::variant<
std::nullptr_t,
int,
bool,
double,
std::string,
List,
Dict>;
Val() : value_(nullptr) {}
Val(std::nullptr_t) : value_(nullptr) {}
Val(int value) : value_(value) {}
Val(double value) : value_(value) {}
Val(const std::string& value) : value_(value) {}
Val(const char* value) : value_(std::string(value)) {}
Val(const List& value) : value_(value) {}
Val(const Dict& value) : value_(value) {}
Val(bool value) : value_(value) {}//避免隱式轉換變成bool
Val(std::initializer_list<Val> l) : value_(List(l)) {}
private:
val value_;
};
接著為了方便輸出數值,可以過載運算子
//記得在Val裡 friend std::ostream& operator<<(std::ostream& out, const Val& v);
std::ostream& operator<<(std::ostream& out, const Val& v)
{
if (std::holds_alternative<std::nullptr_t>(v.value_))
out << "null";
else if (std::holds_alternative<bool>(v.value_))
out << (std::get<bool>(v.value_) ? "true" : "false");
else if (std::holds_alternative<int>(v.value_))
out << std::get<int>(v.value_);
else if (std::holds_alternative<double>(v.value_))
out << std::get<double>(v.value_);
else if (std::holds_alternative<std::string>(v.value_))
out << "\"" << std::get<std::string>(v.value_) << "\"";
else if (std::holds_alternative<Val::List>(v.value_))
{
out << "[";
const auto& list = std::get<Val::List>(v.value_);
for (size_t i = 0; i < list.size(); ++i) {
if (i > 0) out << ", ";
out << list[i];
}
out << "]";
}
else if (std::holds_alternative<Val::Dict>(v.value_))
{
out << "{";
const auto& dict = std::get<Val::Dict>(v.value_);
for (auto it = dict.begin(); it != dict.end(); ++it)
{
if (it != dict.begin()) out << ", ";
out << "\"" << it->first << "\": " << it->second;
}
out << "}";
}
return out;
}
測試下
int main()
{
Val v1 = nullptr;
Val v2 = true;
Val v3 = 3.14;
Val v4 = "Hello";
Val::List list = { v1, v2, v3, v4 };
Val v5 = list;
Val::Dict dict = { {"key1", v1}, {"key2", v2}, {"key3", v3}, {"key4", v4}, {"key5", v5} };
Val v6 = dict;
std::cout << v1 << std::endl;
std::cout << v2 << std::endl;
std::cout << v3 << std::endl;
std::cout << v4 << std::endl;
std::cout << v5 << std::endl;
std::cout << v6 << std::endl;
return 0;
}
輸出
nullptr
true
3.14
"Hello"
[nullptr, true, 3.14, "Hello"]
{"key1": nullptr, "key2": true, "key3": 3.14, "key4": "Hello", "key5": [nullptr, true, 3.14, "Hello"]}
為了方便使用,我們再過載[]
Val& operator[](const Val& val)
{
if (std::holds_alternative<Val::Dict>(value_))
{
return std::get<Dict>(value_)[std::get<std::string>(val.value_)];
}
else if (std::holds_alternative<Val::List>(value_))
{
return std::get<List>(value_)[std::get<int>(val.value_)];
}
else if (std::holds_alternative<std::nullptr_t>(value_))
{
if (std::holds_alternative<std::string>(val.value_))
{
value_ = Dict({ {val, nullptr} });
return std::get<Dict>(value_)[std::get<std::string>(val.value_)];
}
else if (std::holds_alternative<int>(val.value_))
{
int index = std::get<int>(val.value_);
value_ = List(index + 1, nullptr);
return std::get<List>(value_)[index];
}
}
throw std::runtime_error("Not a dict or a list");
}
Val& operator[](const char* key)
{
if (std::holds_alternative<Val::Dict>(value_))
{
return std::get<Dict>(value_)[key];
}
else if (std::holds_alternative<std::nullptr_t>(value_))
{
value_ = Dict({ {key, nullptr} });
return std::get<Dict>(value_)[key];
}
throw std::runtime_error("Not a dictionary");
}
這樣可以使用[]來輸出內容
int main()
{
Val v1 = {1,2,3,"haha",3.14};
std::cout<<v1[2]<<' '<<v1[3]<<"\n";
v1[2] = "hello world";
std::cout<<v1<<"\n";
Val::Dict v2;
v2["nihao"] = 1;
std::cout<<v2;
return 0;
}
輸出
3 "haha"
[1, 2, "hello world", "haha", 3.14]
{"nihao": 1}
接下來我們新增兩個函式,add用於list新數值的新增,put用於新增新的對映,
void add(Val v)
{
if (std::holds_alternative<Val::List>(value_))
std::get<List>(value_).push_back(v);
else
{
if (std::holds_alternative<std::nullptr_t>(value_))
value_ = List({ v });
else
{
List l = { *this, v };
value_ = l;
}
}
}
void put(const std::string& key, Val value)
{
if (std::holds_alternative<Val::Dict>(value_))
{
std::get<Dict>(value_)[key] = value;
}
else if (std::holds_alternative<std::nullptr_t>(value_))
{
value_ = Dict({ {key, value} });
}
else
throw std::runtime_error("Not a dictionary");
}
進行測試
int main()
{
Val x;
x.add("are u ok?");
x.add("hello");
x.add("3Q");
Val y;
y.put("phone",998244353);
std::cout<<x<<"\n"<<y;
return 0;
}
結果
["are u ok?", "hello", "3Q"]
{"phone": 998244353}
然後我們方便來直接獲取Val的值寫一些型別轉換
operator std::nullptr_t() const { return std::get<std::nullptr_t>(value_); }
operator bool() const { return std::get<bool>(value_); }
operator int() const { return std::get<int>(value_); }
operator double() const { return std::get<double>(value_); }
operator std::string() const { return std::get<std::string>(value_); }
operator List() const { return std::get<List>(value_); }
operator Dict() const { return std::get<Dict>(value_); }
測試
int main()
{
Val x = "asdfg";
std::string s = x;
std::cout<<s;
return 0;
}
輸出
asdfg
那麼我們的Val類到這裡就告一段落了。接下來寫parser類
首先,我們使用C++裡的stringstream來幫我們完成字串的順序使用
class parser
{
public:
private:
std::stringstream ss;
};
我們整個的流程是parse函式
Val parse(const std::string& json)
{
ss.clear();
ss << json;
return parseVal();
};
然後寫一個parseVal用來詳細解析並返回Val型別
Val parseVal()
{
while (ss.peek() != -1)
{
skipSpace();
char c = ss.peek();
if (c == '"')
{
return parseStr();
}
else if (c == '[')
{
return parseList();
}
else if (c == '{')
{
return parseDict();
}
else if (c == 't' || c == 'f')
{
return parseBool();
}
else if (c == 'n')
{
return parseNull();
}
else
{
return parseNumber();
}
}
return 0;
}
- 對於一個字串,其中的空格,換行什麼的都需要忽略掉
void skipSpace()
{
char c = ss.peek();
while (c == ' ' || c == '\n' || c == '\t' || c == '\r')
{
ss.get();
c = ss.peek();
}
}
- 如果碰到"那麼接下來肯定是個字串
Val parseStr()
{
ss.get();//吃掉引號
char c = ss.peek();
std::string s;
while (ss.peek() != '"')s.push_back(ss.get());
ss.get();//吃掉引號
return Val(s);
}
- 如果碰到左方括號,接下來是list
Val parseList()
{
ss.get();//吃掉左括號
skipSpace();
Val::List l;
while (ss.peek() != ']')
{
Val v = parseVal();
l.push_back(v);
char c = ss.peek();
while (c == ',' || c == ' ' || c == '\n' || c == '\t' || c == '\r')
{
ss.get();
c = ss.peek();
}
}
ss.get();//吃掉右括號
return l;
}
- 如果碰到左大括號,接下來肯定是dict
Val parseDict()
{
ss.get();//吃掉左括號
Val::Dict d;
while (ss.peek() != '}')
{
skipSpace();
Val key = parseStr();
while (ss.peek() == ':' || ss.peek() == ' ')ss.get();//吃掉冒號
Val value = parseVal();
d[key] = value;
while (ss.peek() == ' ' || ss.peek() == '\t' || ss.peek() == '\n' || ss.peek() == ',')ss.get();
}
ss.get();//吃掉右括號
return d;
}
- 如果是t或f,那麼肯定是bool型別
Val parseBool()
{
if (ss.peek() == 'f')
{
ss.get(); ss.get(); ss.get(); ss.get(); ss.get();//吃掉 false
return Val(false);
}
else
{
ss.get(); ss.get(); ss.get(); ss.get();//吃掉 true
return Val(true);
}
}
- 如果是n開頭,那麼是null
Val parseNull()
{
ss.get(); ss.get(); ss.get(); ss.get();//吃掉 null
return Val(nullptr);
}
- 剩下的就是數字了,或者是整型,或者是浮點型
Val parseNumber()
{
std::string s;
while (isdigit(ss.peek()) || ss.peek() == 'e' || ss.peek() == '-' || ss.peek() == '+' || ss.peek() == '.')s.push_back(ss.get());
if (count(s.begin(), s.end(), '.') || count(s.begin(), s.end(), 'e'))
{
return stod(s);
}
else
{
return stoi(s);
}
}
嘗試解析一個VS Code的json檔案
int main()
{
parser p;
std::string json = R"({
"version": "0.2.0",
"configurations": [
{
"name": "(Windows) 啟動",
"type": "cppvsdbg",
"request": "launch",
"program": "輸入程式名稱,例如 ${workspaceFolder}/a.exe",
"args": [],
"stopAtEntry": false,
"cwd": "${fileDirname}",
"environment": [],
"console": "externalTerminal"
}
]
})";
std::cout << p.parse(json);
return 0;
}
輸出結果
{"configurations": [{"args": [], "console": "externalTerminal", "cwd": "${fileDirname}", "environment": [], "name": "(Windows) 啟動", "program": "輸入程式名稱,例如 ${workspaceFolder}/a.exe", "request": "launch", "stopAtEntry": false, "type": "cppvsdbg"}], "version": "0.2.0"}
在一個txt裡配置一個更復雜的json,然後進行一些測試
效果
{"departments": [{"courses": [{"courseId": "CS101", "credits": 4, "students": [{"grade": "A", "id": 1, "name": "John Doe"}, {"grade": "B", "id": 2, "name": "Jane Smith"}], "title": "Introduction to Computer Science"}, {"courseId": "CS102", "credits": 3, "students": [{"grade": "A", "id": 3, "name": "Jim Brown"}, {"grade": "C", "id": 4, "name": "Jake White"}], "title": "Data Structures"}], "head": "Dr. Alice", "name": "Computer Science"}, {"courses": [{"courseId": "MATH101", "credits": 4, "students": [{"grade": "A", "id": 5, "name": "Alice Green"}, {"grade": "B", "id": 6, "name": "Bob Blue"}], "title": "Calculus I"}, {"courseId": "MATH102", "credits": 3, "students": [{"grade": "B", "id": 7, "name": "Charlie Black"}, {"grade": "A", "id": 8, "name": "Diana Yellow"}], "title": "Linear Algebra"}], "head": "Dr. Bob", "name": "Mathematics"}], "established": 1990, "events": [{"date": "2023-05-15", "name": "Science Fair", "participants": ["John Doe", "Jane Smith", "Jim Brown"]}, {"date": "2023-06-20", "name": "Math Olympiad", "participants": ["Alice Green", "Bob Blue", "Charlie Black"]}], "facilities": {"library": {"books": 50000, "name": "Central Library", "openHours": "8am - 8pm"}, "sportsComplex": {"name": "Sports Arena", "sports": ["Basketball", "Soccer", "Tennis"]}}, "isPublic": true, "location": {"city": "Example City", "country": "Example Country", "state": "Example State"}, "name": "Example School", "numbers": {"float": 3.14, "integer": 42, "negative_float": -3.14, "negative_integer": -42, "negative_scientific": -0.000123, "scientific": 12300}}
----------------
departments : [{"courses": [{"courseId": "CS101", "credits": 4, "students": [{"grade": "A", "id": 1, "name": "John Doe"}, {"grade": "B", "id": 2, "name": "Jane Smith"}], "title": "Introduction to Computer Science"}, {"courseId": "CS102", "credits": 3, "students": [{"grade": "A", "id": 3, "name": "Jim Brown"}, {"grade": "C", "id": 4, "name": "Jake White"}], "title": "Data Structures"}], "head": "Dr. Alice", "name": "Computer Science"}, {"courses": [{"courseId": "MATH101", "credits": 4, "students": [{"grade": "A", "id": 5, "name": "Alice Green"}, {"grade": "B", "id": 6, "name": "Bob Blue"}], "title": "Calculus I"}, {"courseId": "MATH102", "credits": 3, "students": [{"grade": "B", "id": 7, "name": "Charlie Black"}, {"grade": "A", "id": 8, "name": "Diana Yellow"}], "title": "Linear Algebra"}], "head": "Dr. Bob", "name": "Mathematics"}]
established : 1990
events : [{"date": "2023-05-15", "name": "Science Fair", "participants": ["John Doe", "Jane Smith", "Jim Brown"]}, {"date": "2023-06-20", "name": "Math Olympiad", "participants": ["Alice Green", "Bob Blue", "Charlie Black"]}]
facilities : {"library": {"books": 50000, "name": "Central Library", "openHours": "8am - 8pm"}, "sportsComplex": {"name": "Sports Arena", "sports": ["Basketball", "Soccer", "Tennis"]}}
isPublic : true
location : {"city": "Example City", "country": "Example Country", "state": "Example State"}
name : "Example School"
numbers : {"float": 3.14, "integer": 42, "negative_float": -3.14, "negative_integer": -42, "negative_scientific": -0.000123, "scientific": 12300}
"Example City"
[{"courseId": "MATH101", "credits": 4, "students": [{"grade": "A", "id": 5, "name": "Alice Green"}, {"grade": "B", "id": 6, "name": "Bob Blue"}], "title": "Calculus I"}, {"courseId": "MATH102", "credits": 3, "students": [{"grade": "B", "id": 7, "name": "Charlie Black"}, {"grade": "A", "id": 8, "name": "Diana Yellow"}], "title": "Linear Algebra"}, "C++"]
"def"
完整程式碼
#include <variant>
#include <iostream>
#include <string>
#include <vector>
#include <map>
#include <initializer_list>
#include <sstream>
#include <fstream>
#include <algorithm>
class Val
{
public:
using List = std::vector<Val>;
using Dict = std::map<std::string, Val>;
using val = std::variant<
std::nullptr_t,
int,
bool,
double,
std::string,
List,
Dict>;
Val() : value_(nullptr) {}
Val(std::nullptr_t) : value_(nullptr) {}
Val(int value) : value_(value) {}
Val(double value) : value_(value) {}
Val(const std::string& value) : value_(value) {}
Val(const char* value) : value_(std::string(value)) {}
Val(const List& value) : value_(value) {}
Val(const Dict& value) : value_(value) {}
Val(bool value) : value_(value) {}//避免隱式轉換變成bool
Val(std::initializer_list<Val> l) : value_(List(l)) {}
void add(Val v)
{
if (std::holds_alternative<Val::List>(value_))
std::get<List>(value_).push_back(v);
else
{
if (std::holds_alternative<std::nullptr_t>(value_))
value_ = List({ v });
else
{
List l = { *this, v };
value_ = l;
}
}
}
void put(const std::string& key, Val value)
{
if (std::holds_alternative<Val::Dict>(value_))
{
std::get<Dict>(value_)[key] = value;
}
else if (std::holds_alternative<std::nullptr_t>(value_)) // 如果沒有元素,就把key和value作為第一個元素的dict
{
value_ = Dict({ {key, value} });
}
else
throw std::runtime_error("Not a dictionary");
}
Val& operator[](const Val& val)
{
if (std::holds_alternative<Val::Dict>(value_))
{
return std::get<Dict>(value_)[std::get<std::string>(val.value_)];
}
else if (std::holds_alternative<Val::List>(value_))
{
return std::get<List>(value_)[std::get<int>(val.value_)];
}
else if (std::holds_alternative<std::nullptr_t>(value_))
{
if (std::holds_alternative<std::string>(val.value_))
{
value_ = Dict({ {val, nullptr} });
return std::get<Dict>(value_)[std::get<std::string>(val.value_)];
}
else if (std::holds_alternative<int>(val.value_))
{
int index = std::get<int>(val.value_);
value_ = List(index + 1, nullptr); // 生成一個新的List,並填充nullptr
return std::get<List>(value_)[index];
}
}
throw std::runtime_error("Not a dict or a list");
}
Val& operator[](const char* key)
{
if (std::holds_alternative<Val::Dict>(value_))
{
return std::get<Dict>(value_)[key];
}
else if (std::holds_alternative<std::nullptr_t>(value_)) // 如果沒有元素,就把key和value作為第一個元素的dict
{
value_ = Dict({ {key, nullptr} });
return std::get<Dict>(value_)[key];
}
throw std::runtime_error("Not a dictionary");
}
friend std::ostream& operator<<(std::ostream& out, const Val& v);
// 型別轉換運算子
operator std::nullptr_t() const { return std::get<std::nullptr_t>(value_); }
operator bool() const { return std::get<bool>(value_); }
operator int() const { return std::get<int>(value_); }
operator double() const { return std::get<double>(value_); }
operator std::string() const { return std::get<std::string>(value_); }
operator List() const { return std::get<List>(value_); }
operator Dict() const { return std::get<Dict>(value_); }
private:
val value_;
};
std::ostream& operator<<(std::ostream& out, const class Val& v)
{
if (std::holds_alternative<std::nullptr_t>(v.value_))
out << "nullptr";
else if (std::holds_alternative<bool>(v.value_))
out << (std::get<bool>(v.value_) ? "true" : "false");
else if (std::holds_alternative<int>(v.value_))
out << std::get<int>(v.value_);
else if (std::holds_alternative<double>(v.value_))
out << std::get<double>(v.value_);
else if (std::holds_alternative<std::string>(v.value_))
out << "\"" << std::get<std::string>(v.value_) << "\"";
else if (std::holds_alternative<Val::List>(v.value_))
{
out << "[";
const auto& list = std::get<Val::List>(v.value_);
for (size_t i = 0; i < list.size(); ++i) {
if (i > 0) out << ", ";
out << list[i];
}
out << "]";
}
else if (std::holds_alternative<Val::Dict>(v.value_))
{
out << "{";
const auto& dict = std::get<Val::Dict>(v.value_);
for (auto it = dict.begin(); it != dict.end(); ++it)
{
if (it != dict.begin()) out << ", ";
out << "\"" << it->first << "\": " << it->second;
}
out << "}";
}
return out;
}
class parser
{
public:
void skipSpace()
{
char c = ss.peek();
while (c == ' ' || c == '\n' || c == '\t' || c == '\r')
{
ss.get();
c = ss.peek();
}
}
Val parse(const std::string& json)
{
ss.clear();
ss << json;
return parseVal();
};
Val parseVal()
{
while (ss.peek() != -1)
{
skipSpace();
char c = ss.peek();
if (c == '"')
{
return parseStr();
}
else if (c == '[')
{
return parseList();
}
else if (c == '{')
{
return parseDict();
}
else if (c == 't' || c == 'f')
{
return parseBool();
}
else if (c == 'n')
{
return parseNull();
}
else
{
return parseNumber();
}
}
return 0;
}
Val parseStr()
{
ss.get();//吃掉引號
char c = ss.peek();
std::string s;
while (ss.peek() != '"')s.push_back(ss.get());
ss.get();//吃掉引號
return Val(s);
}
Val parseList()
{
ss.get();//吃掉左括號
skipSpace();
Val::List l;
while (ss.peek() != ']')
{
Val v = parseVal();
l.push_back(v);
char c = ss.peek();
while (c == ',' || c == ' ' || c == '\n' || c == '\t' || c == '\r')
{
ss.get();
c = ss.peek();
}
}
ss.get();//吃掉右括號
return l;
}
Val parseDict()
{
ss.get();//吃掉左括號
Val::Dict d;
while (ss.peek() != '}')
{
skipSpace();
Val key = parseStr();
while (ss.peek() == ':' || ss.peek() == ' ')ss.get();//吃掉冒號
Val value = parseVal();
d[key] = value;
while (ss.peek() == ' ' || ss.peek() == '\t' || ss.peek() == '\n' || ss.peek() == ',')ss.get();
}
ss.get();//吃掉右括號
return d;
}
Val parseBool()
{
if (ss.peek() == 'f')
{
ss.get(); ss.get(); ss.get(); ss.get(); ss.get();//吃掉 false
return Val(false);
}
else
{
ss.get(); ss.get(); ss.get(); ss.get();//吃掉 true
return Val(true);
}
}
Val parseNull()
{
ss.get(); ss.get(); ss.get(); ss.get();//吃掉 null
return Val(nullptr);
}
Val parseNumber()
{
std::string s;
while (isdigit(ss.peek()) || ss.peek() == 'e' || ss.peek() == '-' || ss.peek() == '+')s.push_back(ss.get());
if (count(s.begin(), s.end(), '.') || count(s.begin(), s.end(), 'e'))
{
return stof(s);
}
else
{
return stoi(s);
}
}
private:
std::stringstream ss;
};
int main()
{
std::ifstream fin("test.txt");
std::stringstream ss;
ss << fin.rdbuf();
parser p;
Val x = p.parse(ss.str());
std::cout << x << std::endl;
std::cout << "----------------\n";
for (auto i : Val::Dict(x))
{
std::cout << i.first << " : " << i.second << std::endl;
}
std::cout << x[std::string("location")][std::string("city")] << "\n";
x[std::string("departments")][1][std::string("courses")].add(Val("C++"));
std::cout << x[std::string("departments")][1][std::string("courses")] << "\n";
x.put("abc", "def");
std::cout << x["abc"];
return 0;
}
test檔案
test.txt
{
"name": "Example School",
"location": {
"city": "Example City",
"state": "Example State",
"country": "Example Country"
},
"established": 1990,
"isPublic": true,
"departments": [
{
"name": "Computer Science",
"head": "Dr. Alice",
"courses": [
{
"courseId": "CS101",
"title": "Introduction to Computer Science",
"credits": 4,
"students": [
{"id": 1, "name": "John Doe", "grade": "A"},
{"id": 2, "name": "Jane Smith", "grade": "B"}
]
},
{
"courseId": "CS102",
"title": "Data Structures",
"credits": 3,
"students": [
{"id": 3, "name": "Jim Brown", "grade": "A"},
{"id": 4, "name": "Jake White", "grade": "C"}
]
}
]
},
{
"name": "Mathematics",
"head": "Dr. Bob",
"courses": [
{
"courseId": "MATH101",
"title": "Calculus I",
"credits": 4,
"students": [
{"id": 5, "name": "Alice Green", "grade": "A"},
{"id": 6, "name": "Bob Blue", "grade": "B"}
]
},
{
"courseId": "MATH102",
"title": "Linear Algebra",
"credits": 3,
"students": [
{"id": 7, "name": "Charlie Black", "grade": "B"},
{"id": 8, "name": "Diana Yellow", "grade": "A"}
]
}
]
}
],
"facilities": {
"library": {
"name": "Central Library",
"books": 50000,
"openHours": "8am - 8pm"
},
"sportsComplex": {
"name": "Sports Arena",
"sports": ["Basketball", "Soccer", "Tennis"]
}
},
"events": [
{
"name": "Science Fair",
"date": "2023-05-15",
"participants": ["John Doe", "Jane Smith", "Jim Brown"]
},
{
"name": "Math Olympiad",
"date": "2023-06-20",
"participants": ["Alice Green", "Bob Blue", "Charlie Black"]
}
],
"numbers": {
"integer": 42,
"negative_integer": -42,
"float": 3.14,
"negative_float": -3.14,
"scientific": 1.23e4,
"negative_scientific": -1.23e-4
}
}