在本章节中,我们为 Kaleidoscope 语言添加了控制流语句。
增加控制流语句 #
首先,我们增加了 控制流
关键字,它的格式如下
// control
tok_if = -6,
tok_then = -7,
tok_else = -8,
IF 的 AST 如下
/// IfExprAST - Expression class for if/then/else.
class IfExprAST : public ExprAST {
std::unique_ptr<ExprAST> Cond, Then, Else;
public:
IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then,
std::unique_ptr<ExprAST> Else)
: Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {}
Value *codegen() override;
};
然后我们进行解析
/// ifexpr ::= 'if' expression 'then' expression 'else' expression
static std::unique_ptr<ExprAST> ParseIfExpr() {
getNextToken(); // 吃掉 if token
// condition. 解析出来的就是 Cond 表达式
auto Cond = ParseExpression();
if (!Cond)
return nullptr;
if (CurTok != tok_then) // 期望 'then' 不是就报错了
return LogError("expected then");
getNextToken(); // 吃掉 then
auto Then = ParseExpression();
if (!Then)
return nullptr;
if (CurTok != tok_else)
return LogError("expected else");
getNextToken();
auto Else = ParseExpression();
if (!Else)
return nullptr;
return std::make_unique<IfExprAST>(std::move(Cond), std::move(Then),
std::move(Else));
}
对于 ParsePrimary
就变成如下
static std::unique_ptr<ExprAST> ParsePrimary() {
switch (CurTok) {
default:
return LogError("unknown token when expecting an expression");
case tok_identifier:
return ParseIdentifierExpr();
case tok_number:
return ParseNumberExpr();
case '(':
return ParseParenExpr();
case tok_if:
return ParseIfExpr();
}
}
我们期望的生成的 LR 如下
extern foo();
extern bar();
def baz(x) if x then foo() else bar();
---
define double @baz(double %x) {
entry:
%ifcond = fcmp one double %x, 0.000000e+00
br i1 %ifcond, label %then, label %else
then: ; preds = %entry
%calltmp = call double @foo()
br label %ifcont
else: ; preds = %entry
%calltmp1 = call double @bar()
br label %ifcont
ifcont: ; preds = %else, %then
%iftmp = phi double [ %calltmp, %then ], [ %calltmp1, %else ]
ret double %iftmp
}
因此 codegen
如下
llvm::Value *IfExprAST::codegen () {
llvm::Value *CondV = Cond->codegen ();
if (!CondV)
return nullptr;
// 创建一个 ifcond 和 常量 0 对比,大于 0 为真,小于 0 为假
// 等后面 Merge Then 和 Else 的时候用
CondV = Builder->CreateFCmpONE (
CondV, llvm::ConstantFP::get (*TheContext, llvm::APFloat (0.0)), "ifcond");
llvm::Function *TheFunction = Builder->GetInsertBlock ()->getParent ();
// 创建 Then 和 Else 的基本块
llvm::BasicBlock *ThenBB = llvm::BasicBlock::Create (*TheContext, "then", TheFunction);
llvm::BasicBlock *ElseBB = llvm::BasicBlock::Create (*TheContext, "else");
llvm::BasicBlock *MergeBB = llvm::BasicBlock::Create (*TheContext, "ifcont");
// 创建 Cond LR
// etc: br i1 %ifcond, label %then, label %else
Builder->CreateCondBr (CondV, ThenBB, ElseBB);
// 填充 Then 部分
Builder->SetInsertPoint (ThenBB);
llvm::Value *ThenV = Then->codegen ();
if (!ThenV)
return nullptr;
// 创建 BR 跳转, etc
// br label %ifcont
Builder->CreateBr (MergeBB);
ThenBB = Builder->GetInsertBlock ();
// 和 Then 一样的逻辑,再处理一遍
TheFunction->getBasicBlockList ().push_back (ElseBB);
Builder->SetInsertPoint (ElseBB);
llvm::Value *ElseV = Else->codegen ();
if (!ElseV)
return nullptr;
// 创建 BR 跳转, etc
// br label %ifcont
Builder->CreateBr (MergeBB);
// codegen of 'Else' can change the current block, update ElseBB for the PHI.
ElseBB = Builder->GetInsertBlock ();
// Emit merge block.
TheFunction->getBasicBlockList ().push_back (MergeBB);
// 创建 Merge 入口
Builder->SetInsertPoint (MergeBB);
// %iftmp = phi double [ %calltmp, %then ], [ %calltmp1, %else ]
llvm::PHINode *PN =
Builder->CreatePHI (llvm::Type::getDoubleTy (*TheContext), 2, "iftmp");
PN->addIncoming (ThenV, ThenBB);
PN->addIncoming (ElseV, ElseBB);
return PN;
}
这里使用了一个PHI
机制,还没有使用可变变量,等后面到可变变量的时候就会改写下。
For #
For 就比较雷同,我们直接看解析器部分
/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
static std::unique_ptr<ExprAST> ParseForExpr () {
getNextToken (); // 吞掉 the for.
if (CurTok != tok_identifier)
return LogError ("expected identifier after for");
// 获取变量定义部分
std::string IdName = IdentifierStr;
getNextToken (); // 吞掉 identifier.
if (CurTok != '=')
return LogError ("expected '=' after for");
getNextToken (); // 吞掉 '='.
// 解析开始语句,就是初始化变量
auto Start = ParseExpression ();
if (!Start)
return nullptr;
if (CurTok != ',')
return LogError ("expected ',' after for start value");
getNextToken (); // 吞掉 ','.
// 解析结束语句
auto End = ParseExpression ();
if (!End)
return nullptr;
// 读取步长.
std::unique_ptr<ExprAST> Step;
if (CurTok == ',') {
getNextToken ();
Step = ParseExpression ();
if (!Step)
return nullptr;
}
if (CurTok != tok_in)
return LogError ("expected 'in' after for");
getNextToken (); // eat 'in'.
auto Body = ParseExpression ();
if (!Body)
return nullptr;
return std::make_unique<ForExprAST> (IdName, std::move (Start),
std::move (End), std::move (Step), std::move (Body));
}
对于 codegen
也就想当熟悉对于如下
extern putchard(char);
def printstar(n)
for i = 1, i < n, 1.0 in
putchard(42); # ascii 42 = '*'
# print 100 '*' characters
printstar(100);
我们期望生成的如下的 LR
declare double @putchard(double)
define double @printstar(double %n) {
entry:
; initial value = 1.0 (inlined into phi)
br label %loop
loop: ; preds = %loop, %entry
%i = phi double [ 1.000000e+00, %entry ], [ %nextvar, %loop ]
; body
%calltmp = call double @putchard(double 4.200000e+01)
; increment
%nextvar = fadd double %i, 1.000000e+00
; termination test
%cmptmp = fcmp ult double %i, %n
%booltmp = uitofp i1 %cmptmp to double
%loopcond = fcmp one double %booltmp, 0.000000e+00
br i1 %loopcond, label %loop, label %afterloop
afterloop: ; preds = %loop
; loop always returns 0.0
ret double 0.000000e+00
}
llvm::Value *ForExprAST::codegen () {
// 先生成 Start 部分的初始化值
llvm::Value *StartVal = Start->codegen ();
if (!StartVal)
return nullptr;
// 生成 后面要用的各个基础块
llvm::Function *TheFunction = Builder->GetInsertBlock ()->getParent ();
llvm::BasicBlock *PreheaderBB = Builder->GetInsertBlock ();
llvm::BasicBlock *LoopBB = llvm::BasicBlock::Create (*TheContext, "loop", TheFunction);
// etc: br label %loop
Builder->CreateBr (LoopBB);
Builder->SetInsertPoint (LoopBB);
// Start the PHI node with an entry for Start.
// 创建 PHI 节点,用来保存当前变量的值
llvm::PHINode *Variable =
Builder->CreatePHI (llvm::Type::getDoubleTy (*TheContext), 2, VarName.c_str ());
// 来源于 PreheaderBB 的时候时候就赋值为 StartVal
Variable->addIncoming (StartVal, PreheaderBB);
// 在 NamedValues 保存这个值,可以后面更新
llvm::Value *OldVal = NamedValues[VarName];
NamedValues[VarName] = Variable;
// 生产步长增加逻辑
if (!Body->codegen ())
return nullptr;
// Emit the step value.
llvm::Value *StepVal = nullptr;
if (Step) {
StepVal = Step->codegen ();
if (!StepVal)
return nullptr;
} else {
// 默认步长 1.0
StepVal = llvm::ConstantFP::get (*TheContext, llvm::APFloat (1.0));
}
llvm::Value *NextVar = Builder->CreateFAdd (Variable, StepVal, "nextvar");
// Update the variable in the for loop value
llvm::Value *EndCond = End->codegen ();
if (!EndCond)
return nullptr;
// 循环迭代这样的逻辑
EndCond = Builder->CreateFCmpONE (EndCond,
llvm::ConstantFP::get (*TheContext, llvm::APFloat (0.0)), "loopcond");
// Create the "after loop" block and insert it.
llvm::BasicBlock *LoopEndBB = Builder->GetInsertBlock ();
llvm::BasicBlock *AfterBB =
llvm::BasicBlock::Create (*TheContext, "afterloop", TheFunction);
// etc: br i1 %loopcond, label %loop, label %afterloop
Builder->CreateCondBr (EndCond, LoopBB, AfterBB);
// Any new code will be inserted in AfterBB.
Builder->SetInsertPoint (AfterBB);
// 如果来源于 loop 就是 NextVar
Variable->addIncoming (NextVar, LoopEndBB);
// Restore the unshadowed variable.
if (OldVal)
NamedValues[VarName] = OldVal;
else
NamedValues.erase (VarName);
// for 永远返回一个 null 值
return llvm::Constant::getNullValue (llvm::Type::getDoubleTy (*TheContext));
}