Created
February 8, 2019 16:57
-
-
Save qzhong0605/2a39f6e105ee05e298a6a1fab7bfb77c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This script is used to generate mobilenetv1 | |
# | |
################################################################################ | |
from caffe.proto import caffe_pb2 | |
from google.protobuf import text_format | |
import argparse | |
def Conv2dWithoutBias(net, bottom, top, dim_out, stride=1, kernel=1, pad=0): | |
""" A convolution block including 2d-convolution, batchnorm and relu layers | |
without bias | |
Args: | |
bottom: input for conv2d block | |
top: output for | |
dim_out: the number of the filters | |
stride: the stride for convolution layer parameter | |
kernel: the kernel for convolution layer parameter | |
Return: | |
the output blob for conv2d block | |
""" | |
conv_layer = caffe_pb2.LayerParameter() | |
conv_layer.type = "Convolution" | |
conv_layer.name = top | |
conv_layer.bottom.extend([bottom]) | |
conv_layer.top.extend([top]) | |
conv_layer_param = caffe_pb2.ParamSpec() | |
conv_layer_param.lr_mult = 1.0 | |
conv_layer_param.decay_mult = 1.0 | |
conv_layer.param.extend([conv_layer_param]) | |
conv_param = conv_layer.convolution_param | |
conv_param.num_output = dim_out | |
conv_param.bias_term = False | |
conv_param.kernel_size.append(kernel) | |
conv_param.stride.append(stride) | |
conv_param.pad.append(pad) | |
conv_param.group = 1 | |
conv_param.weight_filler.type = "msra" | |
ret = "{}/bn".format(top) | |
bn_layer = caffe_pb2.LayerParameter() | |
bn_layer.name = "{}/bn".format(top) | |
bn_layer.type = "BatchNorm" | |
bn_layer_param = caffe_pb2.ParamSpec() | |
bn_layer_param.lr_mult = 0. | |
bn_layer_param.decay_mult = 0. | |
bn_layer.param.extend([bn_layer_param] * 3) | |
bn_layer.bottom.extend([top]) | |
bn_layer.top.extend([ret]) | |
scale_layer = caffe_pb2.LayerParameter() | |
scale_layer.name = "{}/scale".format(top) | |
scale_layer.type = "Scale" | |
scale_layer.scale_param.bias_term = True | |
scale_layer.bottom.extend([ret]) | |
scale_layer.top.extend([ret]) | |
relu_layer = caffe_pb2.LayerParameter() | |
relu_layer.name = "{}/relu".format(top) | |
relu_layer.type = "ReLU" | |
relu_layer.bottom.extend([ret]) | |
relu_layer.top.extend([ret]) | |
net.layer.extend([conv_layer, bn_layer, scale_layer, relu_layer]) | |
return ret | |
def DepthwiseConvWithoutBias(net, bottom, top, dim_out, kernel=1, stride=1, pad=0): | |
""" A depthwise convolution including depthwise convolution, batchnorm layers, | |
and relu layer | |
Args: | |
bottom: a list of string for the layer inputs | |
top: a list of string for the layer outputs | |
dim_out: the number of the filters | |
Return: | |
the output blob for depthwiseconv block | |
""" | |
conv_layer = caffe_pb2.LayerParameter() | |
# conv_layer.type = "DepthwiseConvolution" | |
conv_layer.type = "Convolution" | |
conv_layer.name = top | |
conv_layer.bottom.extend([bottom]) | |
conv_layer.top.extend([top]) | |
conv_layer_param = caffe_pb2.ParamSpec() | |
conv_layer_param.lr_mult = 1.0 | |
conv_layer_param.decay_mult = 1.0 | |
conv_layer.param.extend([conv_layer_param]) | |
conv_param = conv_layer.convolution_param | |
conv_param.num_output = dim_out | |
conv_param.bias_term = False | |
conv_param.kernel_size.append(kernel) | |
conv_param.stride.append(stride) | |
conv_param.pad.append(pad) | |
conv_param.group = dim_out | |
conv_param.weight_filler.type = "msra" | |
ret = "{}/bn".format(top) | |
bn_layer = caffe_pb2.LayerParameter() | |
bn_layer.name = "{}/bn".format(top) | |
bn_layer.type = "BatchNorm" | |
bn_layer_param = caffe_pb2.ParamSpec() | |
bn_layer_param.lr_mult = 0. | |
bn_layer_param.decay_mult = 0. | |
bn_layer.param.extend([bn_layer_param] * 3) | |
bn_layer.bottom.extend([top]) | |
bn_layer.top.extend([ret]) | |
scale_layer = caffe_pb2.LayerParameter() | |
scale_layer.name = "{}/scale".format(top) | |
scale_layer.type = "Scale" | |
scale_layer.scale_param.bias_term = True | |
scale_layer.bottom.extend([ret]) | |
scale_layer.top.extend([ret]) | |
relu_layer = caffe_pb2.LayerParameter() | |
relu_layer.name = "{}/relu".format(top) | |
relu_layer.type = "ReLU" | |
relu_layer.bottom.extend([ret]) | |
relu_layer.top.extend([ret]) | |
net.layer.extend([conv_layer, bn_layer, scale_layer, relu_layer]) | |
return ret | |
def DataLayer(net, train_data, val_data, | |
crop_size=224, | |
scale=0.017, | |
mean_value=[104.0, 117.0, 123.0], | |
train_batch_size=32, val_batch_size=32): | |
""" Generate input data layer | |
Args: | |
train_data: input data for training, which is a database data, including lmdb and leveldb | |
val_data: input data for validation, which is a database data, including lmdb and leveldb | |
crop_size: which kind input size, such as 224, 299 etc. | |
scale: after substract the mean value, scale this value | |
mean_value: input data processing, substracting this mean value | |
train_batch_size: the batch size for training procedure | |
val_batch_size: the batch size for validation procedure | |
Return: | |
output blob for data layer | |
""" | |
# train layer | |
train_layer = caffe_pb2.LayerParameter() | |
train_layer.name = "data" | |
train_layer.type = "Data" | |
train_layer.top.extend(["data", "label"]) | |
train_state_rule = caffe_pb2.NetStateRule() | |
train_state_rule.phase = caffe_pb2.TRAIN | |
train_layer.include.extend([train_state_rule]) | |
train_trans_param = train_layer.transform_param | |
train_trans_param.mirror = True | |
train_trans_param.crop_size = crop_size | |
train_trans_param.scale = scale | |
train_trans_param.mean_value.extend(mean_value) | |
train_data_param = train_layer.data_param | |
train_data_param.batch_size = train_batch_size | |
train_data_param.source = train_data | |
train_data_param.backend = 1 # lmdb | |
# validation layer | |
val_layer = caffe_pb2.LayerParameter() | |
val_layer.name = "data" | |
val_layer.type = "Data" | |
val_layer.top.extend(["data", "label"]) | |
val_state_rule = caffe_pb2.NetStateRule() | |
val_state_rule.phase = caffe_pb2.TEST | |
val_layer.include.extend([val_state_rule]) | |
val_trans_param = val_layer.transform_param | |
val_trans_param.mirror = False | |
val_trans_param.crop_size = crop_size | |
val_trans_param.scale = scale | |
val_trans_param.mean_value.extend(mean_value) | |
val_data_param = val_layer.data_param | |
val_data_param.batch_size = val_batch_size | |
val_data_param.source = val_data | |
val_data_param.backend = 1 # lmdb | |
net.layer.extend([train_layer, val_layer]) | |
return "data", "label" | |
def PoolLayer(net, bottom, top, | |
global_pooling=False, | |
kernel=1, pool_method=0, stride=1, pad=0): | |
""" Add pooling layer, including MAX pooling and AVE pooling layer | |
Args: | |
net: caffe net | |
bottom: the input blob | |
top: the output blob | |
pool_method: which kind pooling method, including MAX and AVE | |
kernel, stride, pad: the pooling parameter for pooling layer | |
Return: | |
output blob for pooling layer | |
""" | |
pool_layer = caffe_pb2.LayerParameter() | |
pool_layer.name = top | |
pool_layer.type = "Pooling" | |
pool_layer.bottom.extend([bottom]) | |
pool_layer.top.extend([top]) | |
pool_param = pool_layer.pooling_param | |
pool_param.pool = pool_method | |
if global_pooling: | |
pool_param.global_pooling = True | |
else: | |
pool_param.kernel_size = kernel | |
pool_param.stride = stride | |
pool_param.pad = pad | |
net.layer.extend([pool_layer]) | |
return top | |
def FCLayer(net, bottom, top, dim_out): | |
""" Add fully-connected layer | |
Args: | |
bottom: the input blob for fc layer | |
top: the output blob for fc layer | |
Return: | |
output blob | |
""" | |
fc_layer = caffe_pb2.LayerParameter() | |
fc_layer.name = top | |
fc_layer.type = "InnerProduct" | |
fc_layer.bottom.extend([bottom]) | |
fc_layer.top.extend([top]) | |
fc_param = fc_layer.inner_product_param | |
fc_param.num_output = dim_out | |
net.layer.extend([fc_layer]) | |
return top | |
def SoftmaxWithLossLayer(net, input_data, input_label): | |
""" Add softmaxwithloss layer for getting loss | |
Args: | |
input_data: string, data blob | |
input_label: string, label blob | |
""" | |
softmax_layer = caffe_pb2.LayerParameter() | |
softmax_layer.name = "loss" | |
softmax_layer.type = "SoftmaxWithLoss" | |
softmax_layer.bottom.extend([input_data, input_label]) | |
softmax_layer.top.extend(["loss/loss"]) | |
net.layer.extend([softmax_layer]) | |
def AccuracyLayer(net, input_data, input_label): | |
""" Add accuracy layer for training and validation for imagenet dataset, | |
the accuracy including top1 and top5 | |
Args: | |
net: caffe net | |
input_data: data blob for accuracy | |
input_label: label blob for accuracy | |
""" | |
# top1 layer | |
top1_layer = caffe_pb2.LayerParameter() | |
top1_layer.name = "accuracy/top1" | |
top1_layer.type = "Accuracy" | |
top1_layer.bottom.extend([input_data, input_label]) | |
top1_layer.top.extend(["acc@1"]) | |
top1_acc_param = top1_layer.accuracy_param | |
top1_acc_param.top_k = 1 | |
# top5 layer | |
top5_layer = caffe_pb2.LayerParameter() | |
top5_layer.name = "accuracy/top5" | |
top5_layer.type = "Accuracy" | |
top5_layer.bottom.extend([input_data, input_label]) | |
top5_layer.top.extend(["acc@5"]) | |
top5_acc_param = top5_layer.accuracy_param | |
top5_acc_param.top_k = 5 | |
net.layer.extend([top1_layer, top5_layer]) | |
def generate_mobilenetv1(args): | |
net = caffe_pb2.NetParameter() | |
net.name = "mobilenetv1_{}_{}".format(args.crop_size, args.shrink) | |
# add data layer | |
data, label = DataLayer(net, args.train_data, args.val_data, | |
crop_size=args.crop_size, | |
train_batch_size=args.train_batch_size, | |
val_batch_size=args.val_batch_size) | |
# first conv layer | |
conv1 = Conv2dWithoutBias(net, data, "conv1", int(32*args.shrink), | |
kernel=3, stride=2, pad=1) | |
# depthwise conv layer and 1x1 conv layer (conv2) | |
depconv2 = DepthwiseConvWithoutBias(net, conv1, "conv2/dw", int(32*args.shrink), | |
kernel=3, stride=1, pad=1) | |
conv2 = Conv2dWithoutBias(net, depconv2, "conv2", int(64*args.shrink), | |
kernel=1, stride=1, pad=0) | |
# depthwise conv layer and 1x1 conv layer (conv3) | |
depconv3 = DepthwiseConvWithoutBias(net, conv2, "conv3/dw", int(64*args.shrink), | |
kernel=3, stride=2, pad=1) | |
conv3 = Conv2dWithoutBias(net, depconv3, "conv3", int(128*args.shrink), | |
kernel=1, stride=1, pad=0) | |
# depthwise conv layer and 1x1 conv layer (conv4) | |
depconv4 = DepthwiseConvWithoutBias(net, conv3, "conv4/dw", int(128*args.shrink), | |
kernel=3, stride=1, pad=1) | |
conv4 = Conv2dWithoutBias(net, depconv4, "conv4", int(128*args.shrink), | |
kernel=1, stride=1, pad=0) | |
# depthwise conv layer and 1x1 conv layer (conv5) | |
depconv5 = DepthwiseConvWithoutBias(net, conv4, "conv5/dw", int(128*args.shrink), | |
kernel=3, stride=2, pad=1) | |
conv5 = Conv2dWithoutBias(net, depconv5, "conv5", int(256*args.shrink), | |
kernel=1, stride=1, pad=0) | |
# depthwise conv layer and 1x1 conv layer (conv6) | |
depconv6 = DepthwiseConvWithoutBias(net, conv5, "conv6/dw", int(256*args.shrink), | |
kernel=3, stride=1, pad=1) | |
conv6 = Conv2dWithoutBias(net, depconv6, "conv6", int(256*args.shrink), | |
kernel=1, stride=1, pad=0) | |
# depthwise conv layer and 1x1 conv layer (conv7) | |
depconv7 = DepthwiseConvWithoutBias(net, conv6, "conv7/dw", int(256*args.shrink), | |
kernel=3, stride=2, pad=1) | |
conv7 = Conv2dWithoutBias(net, depconv7, "conv7", int(512*args.shrink), | |
kernel=1, stride=1, pad=0) | |
# depthwise conv layer and 1x1 conv layer (loop) | |
depconv8 = conv7 | |
conv8 = conv7 | |
for idx in range(5): | |
depconv8 = DepthwiseConvWithoutBias(net, conv8, "conv8/{}/dw".format(idx), | |
int(512*args.shrink), kernel=3, stride=1, pad=1) | |
conv8 = Conv2dWithoutBias(net, depconv8, "conv8/{}".format(idx), | |
int(512*args.shrink), kernel=1, stride=1, pad=0) | |
# depthwise conv layer and 1x1 conv layer (conv9) | |
depconv9 = DepthwiseConvWithoutBias(net, conv8, "conv9/dw", int(512*args.shrink), | |
kernel=3, stride=2, pad=1) | |
conv9 = Conv2dWithoutBias(net, depconv9, "conv9", int(1024*args.shrink), | |
kernel=1, stride=1, pad=0) | |
# depthwise conv layer and 1x1 conv layer (conv10) | |
depconv10 = DepthwiseConvWithoutBias(net, conv9, "conv10/dw", int(1024*args.shrink), | |
kernel=3, stride=1, pad=1) | |
conv10 = Conv2dWithoutBias(net, depconv10, "conv10", int(1024*args.shrink), | |
kernel=1, stride=1, pad=0) | |
# average pool | |
pool11 = PoolLayer(net, conv10, "pool11", global_pooling=True, pool_method=1) | |
# fc layer | |
fc12 = FCLayer(net, pool11, "fc12", 1000) | |
# softmax and accuracy layer | |
SoftmaxWithLossLayer(net, fc12, label) | |
AccuracyLayer(net, fc12, label) | |
with open(args.output_net, "w") as net_stream: | |
net_stream.write(text_format.MessageToString(net)) | |
def parse_args(): | |
parser = argparse.ArgumentParser(description="generate mobilenetv1") | |
parser.add_argument("--output_net", type=str, required=True, | |
help="output file containing the network") | |
parser.add_argument("--train_data", type=str, | |
default="/mnt/disk1/zhibin/experiment_data/imagenet/caffe_lmdb/ilsvrc12_encoded_train_lmdb", | |
help="train data file path for imagenet") | |
parser.add_argument("--val_data", type=str, | |
default="/mnt/disk1/zhibin/experiment_data/imagenet/caffe_lmdb/ilsvrc12_encoded_val_lmdb", | |
help="validation data file path for imagenet") | |
parser.add_argument("--crop_size", type=int, default=224, | |
help="which scale size for input image") | |
parser.add_argument("--shrink", type=float, default=1.0, | |
help="multiply factor for shrinking the network") | |
parser.add_argument("--train_batch_size", type=int, default=32, | |
help="the mini-batch for train") | |
parser.add_argument("--val_batch_size", type=int, default=32, | |
help="the mini-batch for validation") | |
args = parser.parse_args() | |
return args | |
if __name__ == "__main__": | |
args = parse_args() | |
generate_mobilenetv1(args) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment