首发于Bob学步
Caffe2学习笔记(1) 一些基本概念

Caffe2学习笔记(1) 一些基本概念

0. 前言

1. 基本概念

  • Blob:用于保存数据。
    • Python:可以看成一个numpy's ndarray对象。
    • C++(源码):a typed pointer that can store any type of C++ objects。
  • Workspace:用于管理所有Blob
    • 操作:
      • Blob的操作(如获取、判断是否存在、保存等)。
      • 对当前Workspace的一些操作(如切换、重置等)。
      • 运行计算图。
    • Python(源码):从这里这里中可以看出,几乎所有操作都通过pybind11调用了底层C++代码。
    • C++(源码)。
  • Operator:可以看成是一个函数,输入一些数据,输出一些数据,如 layers、数值操作等。
    • 感觉与TensorFlow中的op是一个概念。
    • 基本结构参考这里,创建Operator的本质就是创建了一个protobuf对象。
    • C++: they all derive from a common interface, and are registered by type, so that we can call different operators during runtime。
  • Nets:由Operator组成的计算图。
    • Python(源码): Caffe2's core.Net is a wrapper class around a NetDef protocol buffer。
    • C++(源码)。
  • 其他:
    • 与TensorFlow类似,Caffe2的总体流程也是:先定义计算图(各种op组成),再运行该计算图。

2. Blob & Workspace 操作

2.1. Python

  • 导入库
from caffe2.python import core, workspace, model_helper
from caffe2.proto import caffe2_pb2
import numpy as np
  • Blob 与 Workspace 操作
# blob的底层实现可以是numpy中ndarray对象
X = np.random.randn(2, 3).astype(np.float32)

# 查看当前workspace拥有的blob
print("Current blobs in the workspace: {}".format(workspace.Blobs()))  

# workspace是否有当前指定的blob
print("Workspace has blob 'X'? {}".format(workspace.HasBlob("X")))  
workspace.FeedBlob("X", X)  # 添加blob到workspace

# 从workspace中获取blob
# 如果要获取不存在的blob,会报错
print("Fetched X:\n{}".format(workspace.FetchBlob("X")))  
try:
    workspace.FetchBlob("invincible_pink_unicorn")
except RuntimeError as err:
    print(err)
# 查看当前workspace拥有的blob
print("Current blobs in the workspace: {}".format(workspace.Blobs()))  
# workspace是否有当前指定的blob
print("Workspace has blob 'X'? {}".format(workspace.HasBlob("X")))  
  • workspace 相关操作
# 查看当前workspace
print("Current workspace: {}".format(workspace.CurrentWorkspace()))

# 查看当前workspace中的blobs
print("Current blobs in the workspace: {}".format(workspace.Blobs()))

# 切换workspace
workspace.SwitchWorkspace("gutentag", True)

# 查看当前workspace
print("Current workspace: {}".format(workspace.CurrentWorkspace()))

# 查看当前workspace中的blobs
print("Current blobs in the workspace: {}".format(workspace.Blobs()))

# 切换workspace
workspace.SwitchWorkspace("default")

# 查看当前workspace
print("Current workspace: {}".format(workspace.CurrentWorkspace()))

# 查看当前workspace中的blobs
print("Current blobs in the workspace: {}".format(workspace.Blobs()))

# 清楚当前workspace中的所有内容
workspace.ResetWorkspace()

# 查看当前workspace中的blobs
print("Current blobs in the workspace after reset: {}".format(workspace.Blobs()))

2.2. C++

  • blob与workspace操作
#include <caffe2/core/init.h>
#include <caffe2/core/operator.h>
#include <caffe2/core/operator_gradient.h>
#include <caffe2/proto/caffe2.pb.h>
namespace caffe2 {
    void print(const Blob* blob, const std::string& name) {
        auto tensor = blob->Get<TensorCPU>();
        const auto& data = tensor.data<float>();
        std::cout << name << "(" << tensor.dims() << "): " 
            << std::vector<float>(data, data+tensor.size())
            << std::endl;
    }

    void run() {
        //define blobs with std::vector
        std::vector<float> x(4*3*2);
        for(float &v : x ) {
            v = (float)rand() / RAND_MAX - 0.5;
        }

        // define workspace
        Workspace workspace;

        // print all blobls
        std::cout << "current workspace has blobs" << std::endl;
        std::vector<std::string> blobs = workspace.Blobs();
        for(std::string &s:blobs){
            std::cout << s << std::endl;
        }

        //feed blob
        auto tensor = workspace.CreateBlob("X")->GetMutable<TensorCPU>();
        TensorCPU value = TensorCPU({4, 3, 2}, x, NULL);
        tensor->ResizeLike(value);
        tensor->ShareData(value);

        // print all blobs
        std::cout << "current workspace has blobs" << std::endl;
        blobs = workspace.Blobs();
        for(std::string &s:blobs){
            std::cout << s << std::endl;
        }

        // fetch blob
        auto tensor2 = workspace.GetBlob("X");
        print(tensor2, "X");

        // has blob
        std::cout << "current workspace has blob \"X\"?" << workspace.HasBlob("X") << std::endl;
    }
}
int main(int argc, char** argv) {
    caffe2::GlobalInit(&argc, &argv);
    caffe2::run();
    google::protobuf::ShutdownProtobufLibrary();
    return 0;
}
  • workspace 本身操作
    • 切换 workspace 的本质就是创建新的Workspace对象。
    • 重置 workspace 的本质就是调用 std::unique_ptr.reset(),使用新的对象取代旧对象,并删除旧对象。
    • C++源码中有一个 map对象 (std::string -> std::unique_ptr) 存储所有workspace 以及他们的名称。

3. Operator操作

3.1. Python

from caffe2.python import core, workspace, model_helper
from caffe2.proto import caffe2_pb2
import numpy as np

op = core.CreateOperator(
    "Relu", # The type of operator that we want to run
    ["X"], # A list of input blobs by their names
    ["Y"], # A list of output blobs by their names
)

# op的本质就是protobuf object
print("Type of the created op is: {}".format(type(op)))
print("Content:\n")
print(str(op))

# 定义输入数据
workspace.FeedBlob("X", np.random.randn(2, 3).astype(np.float32))

# 运行op
workspace.RunOperatorOnce(op)

# 查看运行结果
print("Current blobs in the workspace: {}\n".format(workspace.Blobs()))
print("X:\n{}\n".format(workspace.FetchBlob("X")))
print("Y:\n{}\n".format(workspace.FetchBlob("Y")))
print("Expected:\n{}\n".format(np.maximum(workspace.FetchBlob("X"), 0)))

3.2. C++

#include <caffe2/core/init.h>
#include <caffe2/core/operator.h>
#include <caffe2/core/operator_gradient.h>
#include <caffe2/proto/caffe2.pb.h>
namespace caffe2 {
    void print(const Blob* blob, const std::string& name) {
        auto tensor = blob->Get<TensorCPU>();
        const auto& data = tensor.data<float>();
        std::cout << name << "(" << tensor.dims() << "): " 
            << std::vector<float>(data, data+tensor.size())
            << std::endl;
    }
    void run() {
        //define blobs with std::vector
        std::vector<float> x(4*3*2);
        for(float &v : x ) {
            v = (float)rand() / RAND_MAX - 0.5;
        }
        // define workspace
        Workspace workspace;
        //feed blob
        auto tensor = workspace.CreateBlob("X")->GetMutable<TensorCPU>();
        TensorCPU value = TensorCPU({4, 3, 2}, x, NULL);
        tensor->ResizeLike(value);
        tensor->ShareData(value);
        // create a OperatorDef and run it with workspace
        caffe2::OperatorDef* op_def = new OperatorDef();
        op_def->set_type("Relu");
        op_def->add_input("X");
        op_def->add_output("Y");
        // run op
        workspace.RunOperatorOnce(*op_def);
        // print op output
        print(workspace.GetBlob("Y"), "Y");
    }
}

int main(int argc, char** argv) {
    caffe2::GlobalInit(&argc, &argv);
    caffe2::run();
    google::protobuf::ShutdownProtobufLibrary();
    return 0;
}

4. Net

4.1. Python

  • core.Net实现
from caffe2.python import core, workspace, model_helper
from caffe2.proto import caffe2_pb2
import numpy as np

# 新建Net
net = core.Net("my_first_net")

# 查看net的protoc
print("Current network proto:\n\n{}".format(net.Proto()))

# 通过net创建Operator,这种创建方式等价于:
# op = core.CreateOperator("SomeOp", ...)
# net.Proto().op.append(op)
X = net.GaussianFill([], ["X"], mean=0.0, std=1.0, shape=[2, 3], run_once=0)

# 查看net的protoc
print("New network proto:\n\n{}".format(net.Proto()))

# net创建Operator的,返回值是BlobReference
# BlobReference中保存了blob name以及创建该BlobReference的net
print("Type of X is: {}".format(type(X)))
print("The blob name is: {}".format(str(X)))
W = net.GaussianFill([], ["W"], mean=0.0, std=1.0, shape=[5, 3], run_once=0)
b = net.ConstantFill([], ["b"], shape=[5,], value=1.0, run_once=0)

# 也可以通过BlobReference直接创建Operator,其中Blob为输入
# 以下操作等价于 Y = net.FC([X, W, b], ["Y"])
Y = X.FC([W, b], ["Y"])

# 要运行Net可以通过workspace
# 方法一:workspace.RunNetOnce()
workspace.ResetWorkspace()
print("Current blobs in the workspace: {}".format(workspace.Blobs()))
workspace.RunNetOnce(net)
print("Blobs in the workspace after execution: {}".format(workspace.Blobs()))

# Let's dump the contents of the blobs
for name in workspace.Blobs():
    print("{}:\n{}".format(name, workspace.FetchBlob(name)))

# 方法二:先workspace.CreateNet() 再workspace.RunNet()
workspace.ResetWorkspace()
print("Current blobs in the workspace: {}".format(workspace.Blobs()))
workspace.CreateNet(net)
workspace.RunNet(net.Proto().name)
print("Blobs in the workspace after execution: {}".format(workspace.Blobs()))
for name in workspace.Blobs():
    print("{}:\n{}".format(name, workspace.FetchBlob(name)))
  • ModelHelper
from caffe2.python import core, workspace, model_helper
from caffe2.proto import caffe2_pb2
import numpy as np

# 搭建模型前,随机生成一些数据
data = np.random.rand(16, 100).astype(np.float32)
label = (np.random.rand(16) * 10).astype(np.int32)
workspace.FeedBlob("data", data)
workspace.FeedBlob("label", label)

# 使用ModelHelper搭建模型
m = model_helper.ModelHelper(name="my first net")
weight = m.param_init_net.XavierFill([], 'fc_w', shape=[10, 100])
bias = m.param_init_net.ConstantFill([], 'fc_b', shape=[10, ])
fc_1 = m.net.FC(["data", "fc_w", "fc_b"], "fc1")
pred = m.net.Sigmoid(fc_1, "pred")
softmax, loss = m.net.SoftmaxWithLoss([pred, "label"], ["softmax", "loss"])
m.AddGradientOperators([loss])
print(m.net.Proto())

# ModelHelper创建了两个对象
# m.param_init_net,只需要运行一次,用于初始化
# m.net 用于真正训练/预测模型
# 因为没有 反向传播过程,感觉下面这段代码有点呆
workspace.RunNetOnce(m.param_init_net)
workspace.CreateNet(m.net)
for _ in range(100):
    data = np.random.rand(16, 100).astype(np.float32)
    label = (np.random.rand(16) * 10).astype(np.int32)
    workspace.FeedBlob("data", data)
    workspace.FeedBlob("label", label)
    workspace.RunNet(m.name, 10)
print(workspace.FetchBlob("softmax"))
print(workspace.FetchBlob("loss"))

4.2. C++

#include <caffe2/core/init.h>
#include <caffe2/core/operator.h>
#include <caffe2/core/operator_gradient.h>
#include <caffe2/proto/caffe2.pb.h>

namespace caffe2 {
    void print(const Blob* blob, const std::string& name) {
        auto tensor = blob->Get<TensorCPU>();
        const auto& data = tensor.data<float>();
        std::cout << name << "(" << tensor.dims() << "): " 
            << std::vector<float>(data, data+tensor.size())
            << std::endl;
    }

    void run() {
        // >>> data = np.random.rand(16, 100).astype(np.float32)
        std::vector<float> data(16 * 100);
        for (auto& v : data) {
            v = (float)rand() / RAND_MAX;
        }

        // >>> label = (np.random.rand(16) * 10).astype(np.int32)
        std::vector<int> label(16);
        for (auto& v : label) {
            v = 10 * rand() / RAND_MAX;
        }

        // >>> workspace.FeedBlob("data", data)
        {
            auto tensor = workspace.CreateBlob("data")->GetMutable<TensorCPU>();
            auto value = TensorCPU({16, 100}, data, NULL);
            tensor->ResizeLike(value);
            tensor->ShareData(value);
        }

        // >>> workspace.FeedBlob("label", label)
        {
            auto tensor = workspace.CreateBlob("label")->GetMutable<TensorCPU>();
            auto value = TensorCPU({16}, label, NULL);
            tensor->ResizeLike(value);
            tensor->ShareData(value);
        }

        // >>> m = model_helper.ModelHelper(name="my first net")
        NetDef initModel;
        initModel.set_name("my first net_init");
        NetDef predictModel;
        predictModel.set_name("my first net");

        // >>> weight = m.param_initModel.XavierFill([], 'fc_w', shape=[10, 100])
        {
            auto op = initModel.add_op();
            op->set_type("XavierFill");
            auto arg = op->add_arg();
            arg->set_name("shape");
            arg->add_ints(10);
            arg->add_ints(100);
            op->add_output("fc_w");
        }

        // >>> bias = m.param_initModel.ConstantFill([], 'fc_b', shape=[10, ])
        {
            auto op = initModel.add_op();
            op->set_type("ConstantFill");
            auto arg = op->add_arg();
            arg->set_name("shape");
            arg->add_ints(10);
            op->add_output("fc_b");
        }
        std::vector<OperatorDef*> gradient_ops;

        // >>> fc_1 = m.net.FC(["data", "fc_w", "fc_b"], "fc1")
        {
            auto op = predictModel.add_op();
            op->set_type("FC");
            op->add_input("data");
            op->add_input("fc_w");
            op->add_input("fc_b");
            op->add_output("fc1");
            gradient_ops.push_back(op);
        }

        // >>> pred = m.net.Sigmoid(fc_1, "pred")
        {
            auto op = predictModel.add_op();
            op->set_type("Sigmoid");
            op->add_input("fc1");
            op->add_output("pred");
            gradient_ops.push_back(op);
        }

        // >>> [softmax, loss] = m.net.SoftmaxWithLoss([pred, "label"], ["softmax",
        // "loss"])
        {
            auto op = predictModel.add_op();
            op->set_type("SoftmaxWithLoss");
            op->add_input("pred");
            op->add_input("label");
            op->add_output("softmax");
            op->add_output("loss");
            gradient_ops.push_back(op);
        }

        // >>> m.AddGradientOperators([loss])
        {
            auto op = predictModel.add_op();
            op->set_type("ConstantFill");
            auto arg = op->add_arg();
            arg->set_name("value");
            arg->set_f(1.0);
            op->add_input("loss");
            op->add_output("loss_grad");
            op->set_is_gradient_op(true);
        }
        std::reverse(gradient_ops.begin(), gradient_ops.end());
        for (auto op : gradient_ops) {
            vector<GradientWrapper> output(op->output_size());
            for (auto i = 0; i < output.size(); i++) {
                output[i].dense_ = op->output(i) + "_grad";
            }
            GradientOpsMeta meta = GetGradientForOp(*op, output);
            auto grad = predictModel.add_op();
            grad->CopyFrom(meta.ops_[0]);
            grad->set_is_gradient_op(true);
        }

        // >>> print(str(m.net.Proto()))
        std::cout << std::endl;
        print(predictModel);

        // >>> print(str(m.param_init_net.Proto()))
        std::cout << std::endl;
        print(initModel);

        // >>> workspace.RunNetOnce(m.param_init_net)
        CAFFE_ENFORCE(workspace.RunNetOnce(initModel));

        // >>> workspace.CreateNet(m.net)
        CAFFE_ENFORCE(workspace.CreateNet(predictModel));

        // >>> for j in range(0, 100):
        for (auto i = 0; i < 100; i++) {
            // >>> data = np.random.rand(16, 100).astype(np.float32)
            std::vector<float> data(16 * 100);
            for (auto& v : data) {
                v = (float)rand() / RAND_MAX;
            }
            // >>> label = (np.random.rand(16) * 10).astype(np.int32)
            std::vector<int> label(16);
            for (auto& v : label) {
                v = 10 * rand() / RAND_MAX;
            }
            // >>> workspace.FeedBlob("data", data)
            {
                auto tensor = workspace.GetBlob("data")->GetMutable<TensorCPU>();
                auto value = TensorCPU({16, 100}, data, NULL);
                tensor->ShareData(value);
            }
            // >>> workspace.FeedBlob("label", label)
            {
                auto tensor = workspace.GetBlob("label")->GetMutable<TensorCPU>();
                auto value = TensorCPU({16}, label, NULL);
                tensor->ShareData(value);
            }
            // >>> workspace.RunNet(m.name, 10)   # run for 10 times
            for (auto j = 0; j < 10; j++) {
                CAFFE_ENFORCE(workspace.RunNet(predictModel.name()));
                // std::cout << "step: " << i << " loss: ";
                // print(*(workspace.GetBlob("loss")));
                // std::cout << std::endl;
            }
        }
        std::cout << std::endl;
        // >>> print(workspace.FetchBlob("softmax"))
        print(workspace.GetBlob("softmax"), "softmax");
        std::cout << std::endl;
        // >>> print(workspace.FetchBlob("loss"))
        print(workspace.GetBlob("loss"), "loss");
    }
}

int main(int argc, char** argv) {
    caffe2::GlobalInit(&argc, &argv);
    caffe2::run();
    google::protobuf::ShutdownProtobufLibrary();
    return 0;
}

编辑于 2018-03-31

文章被以下专栏收录