看了 the-super-tiny-compiler 的理论部分,自己用 JavaScript 实现了一个简单的编译器,将 LISP 风格的方法转成 C 风格的方法,只有最简单的三部分组成:tokenizer、parser 和 codeGenerate,以下为源码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
/**
* 输入:(add 2 (subtract 4 2))
* 输出:
* [
* { type: 'paren', value: '(' },
* { type: 'name', value: 'add' },
* { type: 'number', value: '2' },
* { type: 'paren', value: '(' },
* { type: 'name', value: 'subtract' },
* { type: 'number', value: '4' },
* { type: 'number', value: '2' },
* { type: 'paren', value: ')' },
* { type: 'paren', value: ')' },
* ]
*/
const Regexes = [/[()]/, /[a-z]/, /[0-9]/];
const tokenizer = input => {
const result = [];
let arr = input.split("");
// 遍历
let i = 0;
while (i < arr.length) {
let init = arr[i];
let index = Regexes.findIndex(regex => regex.test(init));
if (index === 0) {
result.push({
type: "paren",
value: init
});
i++;
} else {
// 一直向后找到第一个和当前类型不同
let j = i + 1;
while (
j < arr.length &&
Regexes.findIndex(regex => regex.test(arr[j])) === index
) {
j++;
}
result.push({
type: index === 1 ? "name" : "number",
value: arr.slice(i, j).join("")
});
i = j;
}
}
return result.filter(v => v.value !== " ");
};

/**
* 输入:
* [
* { type: 'paren', value: '(' },
* { type: 'name', value: 'add' },
* { type: 'number', value: '2' },
* { type: 'paren', value: '(' },
* { type: 'name', value: 'subtract' },
* { type: 'number', value: '4' },
* { type: 'number', value: '2' },
* { type: 'paren', value: ')' },
* { type: 'paren', value: ')' },
* ]
* 输出:
* {
* type: 'Program',
* body: [{
* type: 'CallExpression',
* name: 'add',
* params: [{
* type: 'NumberLiteral',
* value: '2',
* }, {
* type: 'CallExpression',
* name: 'subtract',
* params: [{
* type: 'NumberLiteral',
* value: '4',
* }, {
* type: 'NumberLiteral',
* value: '2',
* }]
* }]
* }]
* }
*/
const parser = tokens => {
let ast = {
type: "Program",
body: []
};
const walk = index => {
if (tokens[index].value === "(") {
return {
type: "CallExpression",
name: tokens[index + 1].value,
params: [walk(index + 2), walk(index + 3)]
};
}
if (tokens[index].type === "number") {
return {
type: "NumberLiteral",
value: tokens[index].value
};
}
};
ast.body.push(walk(0));
return ast;
};

/**
*
* 输入:
* {
* type: 'Program',
* body: [{
* type: 'CallExpression',
* name: 'add',
* params: [{
* type: 'NumberLiteral',
* value: '2',
* }, {
* type: 'CallExpression',
* name: 'subtract',
* params: [{
* type: 'NumberLiteral',
* value: '4',
* }, {
* type: 'NumberLiteral',
* value: '2',
* }]
* }]
* }]
* }
*
* 输出:
* add(2,subtract(4,2))
*
*/
const generate = ast => {
const walk = obj => {
if (obj.type === "CallExpression") {
return `${obj.name}(${walk(obj.params[0])},${walk(obj.params[1])})`;
}
if (obj.type === "NumberLiteral") {
return obj.value;
}
};
return walk(ast.body[0]);
};
const tk = tokenizer("(add 2 (subtract 4 2))");
const ps = parser(tk);
const fc = generate(ps);
console.log(fc);