强曰为道

与天地相似,故不违。知周乎万物,而道济天下,故不过。旁行而不流,乐天知命,故不忧.
文档目录

第 13 章:自定义 Pass 开发

第 13 章:自定义 Pass 开发

“编写一个 Pass 是理解 LLVM 的最好方式。”


13.1 自定义 Pass 开发概述

13.1.1 开发流程

1. 确定 Pass 目标(分析 or 转换)
2. 确定作用范围(Module / Function / Loop)
3. 编写 Pass 代码
4. 注册为插件或编入 LLVM
5. 使用 opt 工具测试
6. 添加到优化流水线

13.1.2 两种部署方式

方式说明适用场景
插件(Plugin)编译为 .so,运行时加载快速迭代、实验
内置(In-tree)编入 LLVM 源码树生产、长期维护

13.2 Hello Pass(插件方式)

13.2.1 分析 Pass — 统计指令数量

// CountInstructionsPass.h
#ifndef LLVM_COUNT_INSTRUCTIONS_PASS_H
#define LLVM_COUNT_INSTRUCTIONS_PASS_H

#include "llvm/IR/PassManager.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Support/raw_ostream.h"

namespace llvm {

// 分析结果
struct CountResult {
    unsigned Total;
    unsigned Arithmetic;
    unsigned Memory;
    unsigned Control;
    unsigned Other;
};

class CountInstructionsPass : public AnalysisInfoMixin<CountInstructionsPass> {
    friend AnalysisInfoMixin<CountInstructionsPass>;
    static AnalysisKey Key;

public:
    using Result = CountResult;
    Result run(Function &F, FunctionAnalysisManager &AM);
};

} // namespace llvm

#endif
// CountInstructionsPass.cpp
#include "CountInstructionsPass.h"

namespace llvm {

AnalysisKey CountInstructionsPass::Key;

CountResult CountInstructionsPass::run(Function &F, 
                                        FunctionAnalysisManager &AM) {
    CountResult Result = {0, 0, 0, 0, 0};

    for (auto &BB : F) {
        for (auto &I : BB) {
            Result.Total++;
            switch (I.getOpcode()) {
            case Instruction::Add:
            case Instruction::Sub:
            case Instruction::Mul:
            case Instruction::SDiv:
            case Instruction::UDiv:
            case Instruction::SRem:
            case Instruction::URem:
            case Instruction::FAdd:
            case Instruction::FSub:
            case Instruction::FMul:
            case Instruction::FDiv:
                Result.Arithmetic++;
                break;
            case Instruction::Load:
            case Instruction::Store:
            case Instruction::Alloca:
            case Instruction::GetElementPtr:
                Result.Memory++;
                break;
            case Instruction::Br:
            case Instruction::Switch:
            case Instruction::Ret:
            case Instruction::Invoke:
            case Instruction::Call:
                Result.Control++;
                break;
            default:
                Result.Other++;
                break;
            }
        }
    }

    outs() << "=== 指令统计: " << F.getName() << " ===\n"
           << "  总计: " << Result.Total << "\n"
           << "  算术: " << Result.Arithmetic << "\n"
           << "  内存: " << Result.Memory << "\n"
           << "  控制: " << Result.Control << "\n"
           << "  其他: " << Result.Other << "\n";

    return Result;
}

} // namespace llvm

13.2.2 转换 Pass — 消除冗余 store

// RedundantStoreElimPass.h
#ifndef LLVM_REDUNDANT_STORE_ELIM_H
#define LLVM_REDUNDANT_STORE_ELIM_H

#include "llvm/IR/PassManager.h"

namespace llvm {

class RedundantStoreElimPass 
    : public PassInfoMixin<RedundantStoreElimPass> {
public:
    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
    static bool isRequired() { return true; }
};

} // namespace llvm

#endif
// RedundantStoreElimPass.cpp
#include "RedundantStoreElimPass.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/InstIterator.h"

namespace llvm {

PreservedAnalyses RedundantStoreElimPass::run(
    Function &F, FunctionAnalysisManager &AM) {
    
    bool Changed = false;

    for (auto &BB : F) {
        // 跟踪每个基本块中最后一个 store
        DenseMap<Value*, StoreInst*> LastStores;

        for (auto II = BB.begin(); II != BB.end(); ) {
            Instruction &I = *II++;

            if (auto *SI = dyn_cast<StoreInst>(&I)) {
                Value *Ptr = SI->getPointerOperand();
                
                auto It = LastStores.find(Ptr);
                if (It != LastStores.end()) {
                    // 找到了对同一地址的前一个 store
                    // 前一个 store 是冗余的
                    StoreInst *PrevStore = It->getSecond();
                    
                    // 检查是否有其他指令在两个 store 之间读取
                    bool MayBeRead = false;
                    for (auto &Check : BB) {
                        if (&Check == SI) break;
                        if (&Check == PrevStore) continue;
                        // 简化:检查是否有 load
                        if (auto *LI = dyn_cast<LoadInst>(&Check)) {
                            if (LI->getPointerOperand() == Ptr) {
                                MayBeRead = true;
                                break;
                            }
                        }
                    }

                    if (!MayBeRead) {
                        PrevStore->eraseFromParent();
                        Changed = true;
                    }
                }
                LastStores[Ptr] = SI;
            }
        }
    }

    if (!Changed)
        return PreservedAnalyses::all();

    PreservedAnalyses PA;
    PA.preserve<DominatorTreeAnalysis>();
    return PA;
}

} // namespace llvm

13.2.3 插件注册

// PluginRegistration.cpp
#include "CountInstructionsPass.h"
#include "RedundantStoreElimPass.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Passes/PassPlugin.h"

using namespace llvm;

// 注册插件
extern "C" LLVM_ATTRIBUTE_WEAK ::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo() {
    return {
        LLVM_PLUGIN_API_VERSION,
        "MyPasses",
        LLVM_VERSION_STRING,
        [](PassBuilder &PB) {
            // 注册分析 Pass
            PB.registerAnalysisRegistrationCallback(
                [](FunctionAnalysisManager &FAM) {
                    FAM.registerPass([&] { 
                        return CountInstructionsPass(); 
                    });
                });

            // 注册转换 Pass — 方式一:流水线解析回调
            PB.registerPipelineParsingCallback(
                [](StringRef Name, FunctionPassManager &FPM,
                   ArrayRef<PassBuilder::PipelineElement>) {
                    if (Name == "count-insts") {
                        FPM.addPass(CountInstsPrinterPass());
                        return true;
                    }
                    if (Name == "redundant-store-elim") {
                        FPM.addPass(RedundantStoreElimPass());
                        return true;
                    }
                    return false;
                });

            // 注册到优化级别
            PB.registerOptimizerLastEPCallback(
                [](ModulePassManager &MPM, OptimizationLevel Level) {
                    if (Level == OptimizationLevel::O2) {
                        MPM.addPass(
                            createModuleToFunctionPassAdaptor(
                                RedundantStoreElimPass()));
                    }
                });
        }
    };
}

13.2.4 构建和使用

# 编译为插件共享库
clang++ -std=c++17 -shared -fPIC \
    -o libmypasses.so \
    CountInstructionsPass.cpp \
    RedundantStoreElimPass.cpp \
    PluginRegistration.cpp \
    $(llvm-config --cxxflags --no-libs --ldflags) \
    -lLLVMCore -lLLVMSupport -lLLVMAnalysis

# 使用插件
opt -load-pass-plugin=./libmypasses.so \
    -passes='count-insts' input.ll -o /dev/null

opt -load-pass-plugin=./libmypasses.so \
    -passes='redundant-store-elim' input.ll -o output.ll

# 组合使用
opt -load-pass-plugin=./libmypasses.so \
    -passes='redundant-store-elim,instcombine' input.ll -o output.ll

13.2.5 CMake 构建

# CMakeLists.txt
cmake_minimum_required(VERSION 3.20)
project(MyPasses)

find_package(LLVM REQUIRED CONFIG)

include_directories(${LLVM_INCLUDE_DIRS})
add_definitions(${LLVM_DEFINITIONS})

# 构建插件
add_library(MyPasses MODULE
    CountInstructionsPass.cpp
    RedundantStoreElimPass.cpp
    PluginRegistration.cpp
)

target_link_libraries(MyPasses LLVMCore LLVMSupport LLVMAnalysis)

# 设置输出为 .so(而非 .dylib)
set_target_properties(MyPasses PROPERTIES
    PREFIX ""
    SUFFIX ".so"
)
mkdir build && cd build
cmake -DLLVM_DIR=/opt/llvm/lib/cmake/llvm ..
make
opt -load-pass-plugin=./libMyPasses.so -passes='count-insts' test.ll

13.3 使用 AST Matcher 编写 Clang Pass

// FindMagicNumbers.cpp — 查找魔术数字
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/Tooling/CommonOptionsParser.h"
#include "clang/Tooling/Tooling.h"

using namespace clang;
using namespace clang::ast_matchers;
using namespace clang::tooling;

class MagicNumberCallback : public MatchFinder::MatchCallback {
public:
    void run(const MatchFinder::MatchResult &Result) override {
        const auto *Lit = Result.Nodes.getNodeAs<IntegerLiteral>("magic");
        if (!Lit) return;

        int64_t Value = Lit->getValue().getSExtValue();
        // 忽略 0 和 1(通常不是魔术数字)
        if (Value == 0 || Value == 1 || Value == -1) return;

        SourceLocation Loc = Lit->getLocation();
        auto &SM = *Result.SourceManager;
        
        llvm::errs() << "警告: 可能的魔术数字 " << Value
                     << " at " << Loc.printToString(SM) << "\n";
    }
};

int main(int argc, const char **argv) {
    auto ExpectedParser = 
        CommonOptionsParser::create(argc, argv, llvm::cl::GeneralCategory);
    if (!ExpectedParser) return 1;

    ClangTool Tool(ExpectedParser->getCompilations(),
                   ExpectedParser->getSourcePathList());

    MagicNumberCallback Callback;
    MatchFinder Finder;

    // 匹配非模板参数中的整数字面量
    Finder.addMatcher(
        integerLiteral(
            unless(hasAncestor(templateTypeParmDecl()))
        ).bind("magic"),
        &Callback
    );

    return Tool.run(newFrontendActionFactory(&Finder).get());
}

13.4 实用 Pass 案例

13.4.1 循环统计 Pass

class LoopStatsPass : public PassInfoMixin<LoopStatsPass> {
public:
    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) {
        auto &LI = AM.getResult<LoopAnalysis>(F);

        for (Loop *L : LI) {
            outs() << "循环: " << L->getHeader()->getName()
                   << " 深度=" << L->getLoopDepth()
                   << " 块数=" << L->getNumBlocks() << "\n";

            // 分析循环体
            unsigned InstCount = 0;
            unsigned CallCount = 0;
            unsigned LoadCount = 0;

            for (BasicBlock *BB : L->getBlocks()) {
                // 跳过子循环的块
                if (LI.getLoopFor(BB) != L) continue;

                for (Instruction &I : *BB) {
                    InstCount++;
                    if (isa<CallInst>(I)) CallCount++;
                    if (isa<LoadInst>(I)) LoadCount++;
                }
            }

            outs() << "  指令数: " << InstCount
                   << " 调用数: " << CallCount
                   << " 加载数: " << LoadCount << "\n";
        }

        return PreservedAnalyses::all();
    }
};

13.4.2 函数大小检查 Pass

class FunctionSizeCheckPass : public PassInfoMixin<FunctionSizeCheckPass> {
    unsigned Threshold;

public:
    FunctionSizeCheckPass(unsigned Threshold = 100) 
        : Threshold(Threshold) {}

    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) {
        unsigned InstCount = 0;
        for (auto &BB : F) {
            InstCount += BB.size();
        }

        if (InstCount > Threshold) {
            errs() << "警告: 函数 " << F.getName() 
                   << " 有 " << InstCount << " 条指令"
                   << " (阈值: " << Threshold << ")\n";
        }

        return PreservedAnalyses::all();
    }
};

13.4.3 内存安全检查 Pass

class MemorySafetyPass : public PassInfoMixin<MemorySafetyPass> {
public:
    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) {
        for (auto &BB : F) {
            for (Instruction &I : BB) {
                if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
                    // 检查是否可能越界
                    for (auto &Idx : GEP->indices()) {
                        if (auto *CI = dyn_cast<ConstantInt>(Idx)) {
                            if (CI->isNegative()) {
                                errs() << "警告: 负数索引 at "
                                       << I.getDebugLoc() << "\n";
                            }
                        }
                    }
                }
            }
        }
        return PreservedAnalyses::all();
    }
};

13.5 调试自定义 Pass

# 打印 Pass 前后的 IR
opt -passes='my-pass' -print-before-all -print-after-all input.ll -o /dev/null 2>&1

# 验证每个 Pass 后的 IR
opt -passes='my-pass' -verify-each input.ll -o output.ll

# 使用 GDB 调试
gdb --args opt -passes='my-pass' input.ll -o output.ll

# 启用 LLVM 调试输出
opt -passes='my-pass' -debug input.ll -o /dev/null 2>&1

# 查看 Pass 统计
opt -passes='my-pass' -stats input.ll -o /dev/null 2>&1

13.6 本章小结

步骤内容
1. 设计确定 Pass 类型和作用范围
2. 实现继承 PassInfoMixinAnalysisInfoMixin
3. 注册通过 PassBuilder 注册插件
4. 构建CMake 编译为 .so
5. 测试opt -load-pass-plugin= 加载运行
6. 调试-print-before-all, -verify-each

扩展阅读

  1. Writing an LLVM Pass (New PM) — 官方教程
  2. LLVM Pass Plugin — 插件开发
  3. LLVM Programmer’s Manual — 编程手册

下一章: 第 14 章:后端目标开发 — 学习 LLVM 后端开发和 TableGen。