Kaleidoscope 语言: Control Flow

在本章节中,我们为 Kaleidoscope 语言添加了控制流语句。

增加控制流语句 #

首先,我们增加了 控制流 关键字,它的格式如下

// control
tok_if = -6,
tok_then = -7,
tok_else = -8,

IF 的 AST 如下

/// IfExprAST - Expression class for if/then/else.
class IfExprAST : public ExprAST {
  std::unique_ptr<ExprAST> Cond, Then, Else;

public:
  IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then,
            std::unique_ptr<ExprAST> Else)
    : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {}

  Value *codegen() override;
};

然后我们进行解析

/// ifexpr ::= 'if' expression 'then' expression 'else' expression
static std::unique_ptr<ExprAST> ParseIfExpr() {
  getNextToken();  // 吃掉 if token

  // condition. 解析出来的就是 Cond 表达式
  auto Cond = ParseExpression();
  if (!Cond)
    return nullptr;

  if (CurTok != tok_then)  // 期望 'then' 不是就报错了
    return LogError("expected then");
  getNextToken();  // 吃掉 then

  auto Then = ParseExpression();
  if (!Then)
    return nullptr;

  if (CurTok != tok_else)
    return LogError("expected else");

  getNextToken();

  auto Else = ParseExpression();
  if (!Else)
    return nullptr;

  return std::make_unique<IfExprAST>(std::move(Cond), std::move(Then),
                                      std::move(Else));
}

对于 ParsePrimary 就变成如下

static std::unique_ptr<ExprAST> ParsePrimary() {
  switch (CurTok) {
  default:
    return LogError("unknown token when expecting an expression");
  case tok_identifier:
    return ParseIdentifierExpr();
  case tok_number:
    return ParseNumberExpr();
  case '(':
    return ParseParenExpr();
  case tok_if:
    return ParseIfExpr();
  }
}

我们期望的生成的 LR 如下


extern foo();
extern bar();
def baz(x) if x then foo() else bar();

---

define double @baz(double %x) {
entry:
  %ifcond = fcmp one double %x, 0.000000e+00
  br i1 %ifcond, label %then, label %else

then:       ; preds = %entry
  %calltmp = call double @foo()
  br label %ifcont

else:       ; preds = %entry
  %calltmp1 = call double @bar()
  br label %ifcont

ifcont:     ; preds = %else, %then
  %iftmp = phi double [ %calltmp, %then ], [ %calltmp1, %else ]
  ret double %iftmp
}

因此 codegen 如下

llvm::Value *IfExprAST::codegen () {
    llvm::Value *CondV = Cond->codegen ();
    if (!CondV)
        return nullptr;

    // 创建一个 ifcond 和 常量 0 对比,大于 0 为真,小于 0 为假
    // 等后面 Merge Then 和 Else 的时候用
    CondV = Builder->CreateFCmpONE (
    CondV, llvm::ConstantFP::get (*TheContext, llvm::APFloat (0.0)), "ifcond");

    llvm::Function *TheFunction = Builder->GetInsertBlock ()->getParent ();

    // 创建 Then 和 Else 的基本块
    llvm::BasicBlock *ThenBB = llvm::BasicBlock::Create (*TheContext, "then", TheFunction);
    llvm::BasicBlock *ElseBB = llvm::BasicBlock::Create (*TheContext, "else");
    llvm::BasicBlock *MergeBB = llvm::BasicBlock::Create (*TheContext, "ifcont");

    // 创建 Cond LR
    // etc:  br i1 %ifcond, label %then, label %else
    Builder->CreateCondBr (CondV, ThenBB, ElseBB);

    // 填充 Then 部分
    Builder->SetInsertPoint (ThenBB);

    llvm::Value *ThenV = Then->codegen ();
    if (!ThenV)
        return nullptr;
    // 创建 BR 跳转, etc
    // br label %ifcont
    Builder->CreateBr (MergeBB);
   
    ThenBB = Builder->GetInsertBlock ();

    // 和 Then 一样的逻辑,再处理一遍
    TheFunction->getBasicBlockList ().push_back (ElseBB);
    Builder->SetInsertPoint (ElseBB);

    llvm::Value *ElseV = Else->codegen ();
    if (!ElseV)
        return nullptr;
    // 创建 BR 跳转, etc
    // br label %ifcont
    Builder->CreateBr (MergeBB);
    // codegen of 'Else' can change the current block, update ElseBB for the PHI.
    ElseBB = Builder->GetInsertBlock ();
    // Emit merge block.
    TheFunction->getBasicBlockList ().push_back (MergeBB);

    // 创建 Merge 入口
    Builder->SetInsertPoint (MergeBB);
    //  %iftmp = phi double [ %calltmp, %then ], [ %calltmp1, %else ]
    llvm::PHINode *PN =
    Builder->CreatePHI (llvm::Type::getDoubleTy (*TheContext), 2, "iftmp");

    PN->addIncoming (ThenV, ThenBB);
    PN->addIncoming (ElseV, ElseBB);
    return PN;
}

这里使用了一个PHI 机制,还没有使用可变变量,等后面到可变变量的时候就会改写下。

For #

For 就比较雷同,我们直接看解析器部分

/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
static std::unique_ptr<ExprAST> ParseForExpr () {
    getNextToken (); // 吞掉 the for.

    if (CurTok != tok_identifier)
        return LogError ("expected identifier after for");
    // 获取变量定义部分
    std::string IdName = IdentifierStr;
    getNextToken (); // 吞掉 identifier.

    if (CurTok != '=')
        return LogError ("expected '=' after for");
    getNextToken (); // 吞掉 '='.

    // 解析开始语句,就是初始化变量
    auto Start = ParseExpression ();
    if (!Start)
        return nullptr;
    if (CurTok != ',')
        return LogError ("expected ',' after for start value");
    getNextToken (); // 吞掉 ','.

    // 解析结束语句
    auto End = ParseExpression ();
    if (!End)
        return nullptr;

    // 读取步长.
    std::unique_ptr<ExprAST> Step;
    if (CurTok == ',') {
        getNextToken ();
        Step = ParseExpression ();
        if (!Step)
            return nullptr;
    }

    if (CurTok != tok_in)
        return LogError ("expected 'in' after for");
    getNextToken (); // eat 'in'.

    auto Body = ParseExpression ();
    if (!Body)
        return nullptr;

    return std::make_unique<ForExprAST> (IdName, std::move (Start),
    std::move (End), std::move (Step), std::move (Body));
}

对于 codegen 也就想当熟悉对于如下

extern putchard(char);
def printstar(n)
  for i = 1, i < n, 1.0 in
    putchard(42);  # ascii 42 = '*'

# print 100 '*' characters
printstar(100);

我们期望生成的如下的 LR

declare double @putchard(double)

define double @printstar(double %n) {
entry:
  ; initial value = 1.0 (inlined into phi)
  br label %loop

loop:       ; preds = %loop, %entry
  %i = phi double [ 1.000000e+00, %entry ], [ %nextvar, %loop ]
  ; body
  %calltmp = call double @putchard(double 4.200000e+01)
  ; increment
  %nextvar = fadd double %i, 1.000000e+00

  ; termination test
  %cmptmp = fcmp ult double %i, %n
  %booltmp = uitofp i1 %cmptmp to double
  %loopcond = fcmp one double %booltmp, 0.000000e+00
  br i1 %loopcond, label %loop, label %afterloop

afterloop:      ; preds = %loop
  ; loop always returns 0.0
  ret double 0.000000e+00
}
llvm::Value *ForExprAST::codegen () {
    //  先生成 Start 部分的初始化值
    llvm::Value *StartVal = Start->codegen ();
    if (!StartVal)
        return nullptr;
    // 生成 后面要用的各个基础块
    llvm::Function *TheFunction   = Builder->GetInsertBlock ()->getParent ();
    llvm::BasicBlock *PreheaderBB = Builder->GetInsertBlock ();
    llvm::BasicBlock *LoopBB = llvm::BasicBlock::Create (*TheContext, "loop", TheFunction);

    // etc: br label %loop
    Builder->CreateBr (LoopBB);

    Builder->SetInsertPoint (LoopBB);

    // Start the PHI node with an entry for Start.
    // 创建 PHI 节点,用来保存当前变量的值
    llvm::PHINode *Variable =
    Builder->CreatePHI (llvm::Type::getDoubleTy (*TheContext), 2, VarName.c_str ());

    // 来源于 PreheaderBB 的时候时候就赋值为 StartVal
    Variable->addIncoming (StartVal, PreheaderBB);
    // 在 NamedValues 保存这个值,可以后面更新
    llvm::Value *OldVal  = NamedValues[VarName];
    NamedValues[VarName] = Variable;

    // 生产步长增加逻辑
    if (!Body->codegen ())
        return nullptr;
    // Emit the step value.
    llvm::Value *StepVal = nullptr;
    if (Step) {
        StepVal = Step->codegen ();
        if (!StepVal)
            return nullptr;
    } else {
        // 默认步长 1.0
        StepVal = llvm::ConstantFP::get (*TheContext, llvm::APFloat (1.0));
    }

    llvm::Value *NextVar = Builder->CreateFAdd (Variable, StepVal, "nextvar");
    // Update the variable in the for loop value
    llvm::Value *EndCond = End->codegen ();
    if (!EndCond)
        return nullptr;

    // 循环迭代这样的逻辑
    EndCond = Builder->CreateFCmpONE (EndCond,
    llvm::ConstantFP::get (*TheContext, llvm::APFloat (0.0)), "loopcond");
    // Create the "after loop" block and insert it.
    llvm::BasicBlock *LoopEndBB = Builder->GetInsertBlock ();
    llvm::BasicBlock *AfterBB =
    llvm::BasicBlock::Create (*TheContext, "afterloop", TheFunction);

    // etc: br i1 %loopcond, label %loop, label %afterloop
    Builder->CreateCondBr (EndCond, LoopBB, AfterBB);

    // Any new code will be inserted in AfterBB.
    Builder->SetInsertPoint (AfterBB);
    // 如果来源于 loop 就是 NextVar
    Variable->addIncoming (NextVar, LoopEndBB);

    // Restore the unshadowed variable.
    if (OldVal)
        NamedValues[VarName] = OldVal;
    else
        NamedValues.erase (VarName);

    // for 永远返回一个 null 值
    return llvm::Constant::getNullValue (llvm::Type::getDoubleTy (*TheContext));
}
comments powered by Disqus