【更新完这个之后,不再更行yolo的东西,应该也不会再使用yolo框架】
一、添加LeakyLayer层
leaky_layer.hpp
template <typename Dtype>class LeakyLayer : public NeuronLayer<Dtype>{public: explicit LeakyLayer(const LayerParameter& param) : NeuronLayer<Dtype>(param) {} virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual inline const char* type() const { return "Leaky"; } virtual inline int ExactNumBottomBlobs() const { return 1; } virtual inline int ExactNumTopBlobs() const { return 1; }protected: virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom){};//?Yê±?1??êμ?? virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);};
leaky_layer.cpp
template <typename Dtype>void LeakyLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { NeuronLayer<Dtype>::LayerSetUp(bottom, top); CHECK_NE(top[0], bottom[0]) << this->type() << " Layer does not " "allow in-place computation.";}template <typename Dtype>void LeakyLayer<Dtype>::Forward_cpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const int count = top[0]->count(); Dtype* top_data = top[0]->mutable_cpu_data(); const Dtype* bottom_data = bottom[0]->cpu_data(); for(int i = 0; i < count; ++i){ if(bottom_data[i] > 0) top_data[i] = bottom_data[i]; else top_data[i] = 0.1*bottom_data[i]; //top_data[i] = bottom_data[i] > 0£? bottom_data[i]: (Dtype(0.1)*bottom_data[i]); }}#ifdef CPU_ONLYSTUB_GPU(LeakyLayer);#endifINSTANTIATE_CLASS(LeakyLayer);REGISTER_LAYER_CLASS(Leaky);
leaky_layer.cu
template <typename Dtype>__global__ void LeakyForward(const int n, const Dtype* in, Dtype* out){ CUDA_KERNEL_LOOP(index, n){ out[index] = in[index] > 0 ? in[index] : in[index]*0.1; }}template <typename Dtype>void LeakyLayer<Dtype>::Forward_gpu( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const int count = bottom[0]->count(); const Dtype* bottom_data = bottom[0]->gpu_data(); Dtype* top_data = top[0]->mutable_gpu_data(); LeakyForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>( count, bottom_data, top_data); CUDA_POST_KERNEL_CHECK;}template<typename Dtype>__global__ void LeakyBackward(const int n, const Dtype* bottom_data, Dtype* bottom_diff, const Dtype* top_diff){ CUDA_KERNEL_LOOP(index, n){ bottom_diff[index] = bottom_data[index] > 0 ? top_diff[index] : top_diff[index]*0.1; }}template<typename Dtype>void LeakyLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom){ const int count = bottom[0]->count(); Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); const Dtype* top_diff = top[0]->mutable_gpu_diff(); const Dtype* bottom_data = bottom[0]->gpu_data(); LeakyBackward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>( count, bottom_data, bottom_diff, top_diff);}INSTANTIATE_LAYER_GPU_FUNCS(LeakyLayer);
二、添加detect层(loss)
detect_layer.hpp
template<typename Dtype>class DetectLayer : public Layer<Dtype>{public: explicit DetectLayer(const LayerParameter& param); virtual ~DetectLayer(){} virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual inline const char* type() const { return "Detect";} virtual inline int ExactNumBottomBlobs() const {return 2;} virtual inline int ExactNumTopBlobs() const { return 1;}protected: virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Backward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<bool>& propagate_down,const vector<Blob<Dtype>*>& top); int classes; int coords; int rescore; int side; int num; bool softmax; bool sqrt; float jiter; float object_scale; float noobject_scale; float class_scale; float coord_scale;};
detect_layer.cpp
template<typename Dtype>Dtype lap(Dtype x1_min,Dtype x1_max,Dtype x2_min,Dtype x2_max){ if(x1_min < x2_min){ if(x1_max < x2_min){ return 0; }else{ if(x1_max > x2_min){ if(x1_max < x2_max){ return x1_max - x2_min; }else{ return x2_max - x2_min; } }else{ return 0; } } }else{ if(x1_min < x2_max){ if(x1_max < x2_max) return x1_max-x1_min; else return x2_max-x1_min; }else{ return 0; } }}template<typename Dtype>Dtype box_iou(const vector<Dtype> box1, const vector<Dtype> box2){ Dtype lap_x = lap(box1[0]-box1[2]/2,box1[0]+box1[2]/2,box2[0]-box2[2]/2,box2[0]+box2[2]/2); Dtype lap_y = lap(box1[1]-box1[3]/2,box1[1]+box1[3]/2,box2[1]-box2[3]/2,box2[1]+box2[3]/2); Dtype are = box1[2]*box1[3]+box2[2]*box2[3]-lap_x*lap_y; if(are < 0.00001) return 0.0; else return (lap_x*lap_y)/are;}template <typename Dtype>DetectLayer<Dtype>::DetectLayer(const LayerParameter& param) : Layer<Dtype>(param){ this->layer_param_.add_propagate_down(true); this->layer_param_.add_propagate_down(false); const DetectParameter& detect_param = this->layer_param_.detect_param(); classes = detect_param.classes(); coords = detect_param.coords(); rescore = detect_param.rescore(); side = detect_param.side(); num = detect_param.num(); softmax = detect_param.softmax(); sqrt = detect_param.sqrt(); jiter = detect_param.jitter(); object_scale = detect_param.object_scale(); noobject_scale = detect_param.noobject_scale(); class_scale = detect_param.class_scale(); coord_scale = detect_param.coord_scale();}template <typename Dtype>void DetectLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top){ Layer<Dtype>::LayerSetUp(bottom, top); this->layer_param_.add_loss_weight(Dtype(1)); int inputs = (side*side*(((1+coords)*num)+classes)); CHECK_EQ(inputs, bottom[0]->count(1)) << "input dimensions error"; CHECK_EQ(top.size(), 1) << "top size must be 1";}template <typename Dtype>void DetectLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top){ vector<int> shape(0); top[0]->Reshape(shape);}template <typename Dtype>void DetectLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top){ int input_num_each = side*side*(((1+coords)*num)+classes); int batch = bottom[0]->num(); int locations = side*side; const Dtype* truth = bottom[1]->cpu_data(); const Dtype* input = bottom[0]->cpu_data(); Dtype* delta = bottom[0]->mutable_cpu_diff(); Dtype& cost = top[0]->mutable_cpu_data()[0]; cost = Dtype(0.0); for(int i = 0; i < bottom[0]->count(); ++i){ delta[i] = Dtype(0.0); } float avg_iou = 0; float avg_cat = 0; float avg_allcat = 0; float avg_obj = 0; float avg_anyobj = 0; int count = 0; for(int b = 0; b < batch; ++b){ int input_index = b*input_num_each; for(int l = 0; l < locations; ++l){ int truth_index = (b*locations+l)*(1+coords+classes); Dtype is_obj = truth[truth_index]; for(int n = 0; n < num;++n){ int delta_index = input_index + locations*classes + l*num + n; delta[delta_index] = noobject_scale*(0 - input[delta_index]); cost += noobject_scale*pow(input[delta_index],2); avg_anyobj += input[delta_index]; } int best_index = 0; float best_iou = 0; float best_rmse = 400; if(is_obj < 0.0001) continue; int class_index = input_index + l*classes; for(int j = 0; j < classes; ++j){ delta[class_index+j]= class_scale * (truth[truth_index+1+j] - input[class_index+j]); if(truth[truth_index+1+j]) avg_cat += input[class_index+j]; avg_allcat += input[class_index+j]; }//classes vector<float> truth_box; truth_box.push_back(float(truth[truth_index+1+classes]/side)); truth_box.push_back(float(truth[truth_index+1+classes+1]/side)); truth_box.push_back(float(truth[truth_index+1+classes+2])); truth_box.push_back(float(truth[truth_index+1+classes+3])); for(int n = 0; n < num; ++n){ int box_index = input_index + locations*(classes+num)+(l*num+n)*coords; vector<float> out_box; out_box.push_back(float(input[box_index]/side)); out_box.push_back(float(input[box_index+1]/side)); if(sqrt){ out_box.push_back(float(input[box_index+2]*input[box_index+2])); out_box.push_back(float(input[box_index+3]*input[box_index+3])); }else{ out_box.push_back(float(input[box_index+2])); out_box.push_back(float(input[box_index+3])); } float iou = box_iou(truth_box, out_box); float rmse = (pow(truth_box[0]-out_box[0],2)+pow(truth_box[1]-out_box[1],2)+pow(truth_box[2]-out_box[2],2)+pow(truth_box[3]-out_box[3],2)); if(best_iou > 0 || iou > 0){ if(iou > best_iou){ best_iou = iou; best_index = n; } }else{ if(rmse < best_rmse){ best_rmse = rmse; best_index = n; } } }//for num int box_index = input_index + locations*(classes+num)+(l*num+best_index)*coords; int tbox_index = truth_index+1+classes; vector<float> best_box; best_box.push_back(float(input[box_index]/side)); best_box.push_back(float(input[box_index+1]/side)); if(sqrt){ best_box.push_back(float(input[box_index+2]*input[box_index+2])); best_box.push_back(float(input[box_index+3]*input[box_index+3])); }else{ best_box.push_back(float(input[box_index+2])); best_box.push_back(float(input[box_index+3])); } int p_index = input_index + locations*classes + l*num + best_index; cost -= noobject_scale*pow(input[p_index],2); cost += object_scale*pow(1-input[p_index],2); avg_obj += input[p_index]; delta[p_index] = object_scale*(1. - input[p_index]); if(rescore){ //delta[p_index] = object_scale*(best_iou - input[p_index]); } delta[box_index] = coord_scale*(truth[tbox_index]-input[box_index]); delta[box_index+1] = coord_scale*(truth[tbox_index+1]-input[box_index+1]); delta[box_index+2] = coord_scale*(truth[tbox_index+2]-input[box_index+2]); delta[box_index+3] = coord_scale*(truth[tbox_index+3]-input[box_index+3]); if(sqrt) { delta[box_index+2] = coord_scale*(std::sqrt(truth[tbox_index+2])-input[box_index+2]); delta[box_index+3] = coord_scale*(std::sqrt(truth[tbox_index+3])-input[box_index+3]); } cost += pow(1-best_iou, 2); avg_iou += best_iou; ++count; }//locations }//batch for(int i = 0; i < bottom[0]->count(); ++i){ delta[i] = -delta[i]; } //printf("Detection Avg IOU: %f, Pos Cat: %f, All Cat: %f, Pos Obj: %f, Any Obj: %f, count: %d\n", avg_iou, avg_cat, avg_allcat, avg_obj, avg_anyobj, count);}template <typename Dtype>void DetectLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<bool>& propagate_down,const vector<Blob<Dtype>*>& top){}template float lap(float x1_min,float x1_max,float x2_min,float x2_max);template double lap(double x1_min,double x1_max,double x2_min,double x2_max);template float box_iou(const vector<float> box1, const vector<float> box2);template double box_iou(const vector<double> box1, const vector<double> box2);#ifdef CPU_ONLYSTUB_GPU(DetectLayer);#endifINSTANTIATE_CLASS(DetectLayer);REGISTER_LAYER_CLASS(Detect);
三、train的prototxt
yolo-train.prototxt
name: "yolo"layer { name: "data" type: "Data" top: "data" include { phase: TRAIN } data_param { source: "" batch_size: 15 backend: LMDB } transform_param { mean_value:127.5 mean_value:127.5 mean_value:127.5 scale: 0.00784 }}layer { name: "data" type: "Data" top: "data" include { phase: TEST } data_param { source: "" batch_size: 15 backend: LMDB } transform_param { mean_value:127.5 mean_value:127.5 mean_value:127.5 scale: 0.00784 }}layer { name: "truth" type: "HDF5Data" top: "label" include { phase: TRAIN } hdf5_data_param { source: "" batch_size: 15 }}layer { name: "truth" type: "HDF5Data" top: "label" include { phase: TEST } hdf5_data_param { source: "" batch_size: 15 }}layer{ name: "conv1" type: "Convolution" bottom: "data" top: "conv1" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param{ num_output: 64 kernel_size: 7 stride: 2 pad: 3 }}layer{ name: "Leaky1" type: "Leaky" bottom: "conv1" top: "Leaky1"}layer{ name: "pool1" type: "Pooling" bottom: "Leaky1" top: "pool1" pooling_param { pool: MAX kernel_size: 2 stride: 2 }}layer{ name: "conv2" type: "Convolution" bottom: "pool1" top: "conv2" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param{ num_output: 192 kernel_size: 3 stride: 1 pad: 1 }}layer{ name: "Leaky2" type: "Leaky" bottom: "conv2" top: "Leaky2"}layer{ name: "pool2" type: "Pooling" bottom: "Leaky2" top: "pool2" pooling_param { pool: MAX kernel_size: 2 stride: 2 }}layer{ name: "conv3" type: "Convolution" bottom: "pool2" top: "conv3" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param{ num_output: 128 kernel_size: 1 stride: 1 pad: 0 }}layer{ name: "Leaky3" type: "Leaky" bottom: "conv3" top: "Leaky3"}layer{ name: "conv4" type: "Convolution" bottom: "Leaky3" top: "conv4" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param{ num_output: 256 kernel_size: 3 stride: 1 pad: 1 }}layer{ name: "Leaky4" type: "Leaky" bottom: "conv4" top: "Leaky4"}layer{ name: "conv5" type: "Convolution" bottom: "Leaky4" top: "conv5" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param{ num_output: 256 kernel_size: 1 stride: 1 pad: 0 }}layer{ name: "Leaky5" type: "Leaky" bottom: "conv5" top: "Leaky5"}layer{ name: "conv6" type: "Convolution" bottom: "Leaky5" top: "conv6" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param{ num_output: 512 kernel_size: 3 stride: 1 pad: 1 }}layer{ name: "Leaky6" type: "Leaky" bottom: "conv6" top: "Leaky6"}layer{ name: "pool3" type: "Pooling" bottom: "Leaky6" top: "pool3" pooling_param { pool: MAX kernel_size: 2 stride: 2 }}layer{ name: "conv7" type: "Convolution" bottom: "pool3" top: "conv7" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param{ num_output: 256 kernel_size: 1 stride: 1 pad: 0 }}layer{ name: "Leaky7" type: "Leaky" bottom: "conv7" top: "Leaky7"}layer{ name: "conv8" type: "Convolution" bottom: "Leaky7" top: "conv8" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param{ num_output: 512 kernel_size: 3 stride: 1 pad: 1 }}layer{ name: "Leaky8" type: "Leaky" bottom: "conv8" top: "Leaky8"}layer{ name: "conv9" type: "Convolution" bottom: "Leaky8" top: "conv9" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param{ num_output: 256 kernel_size: 1 stride: 1 pad: 0 }}layer{ name: "Leaky9" type: "Leaky" bottom: "conv9" top: "Leaky9"}layer{ name: "conv10" type: "Convolution" bottom: "Leaky9" top: "conv10" convolution_param{ num_output: 512 kernel_size: 3 stride: 1 pad: 1 }}layer{ name: "Leaky10" type: "Leaky" bottom: "conv10" top: "Leaky10"}layer{ name: "conv11" type: "Convolution" bottom: "Leaky10" top: "conv11" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param{ num_output: 256 kernel_size: 1 stride: 1 pad: 0 }}layer{ name: "Leaky11" type: "Leaky" bottom: "conv11" top: "Leaky11"}layer{ name: "conv12" type: "Convolution" bottom: "Leaky11" top: "conv12" convolution_param{ num_output:512 kernel_size: 3 stride: 1 pad: 1 }}layer{ name: "Leaky12" type: "Leaky" bottom: "conv12" top: "Leaky12"}layer{ name: "conv13" type: "Convolution" bottom: "Leaky12" top: "conv13" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param{ num_output: 256 kernel_size: 1 stride: 1 pad: 0 }}layer{ name: "Leaky13" type: "Leaky" bottom: "conv13" top: "Leaky13"}layer{ name: "conv14" type: "Convolution" bottom: "Leaky13" top: "conv14" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param{ num_output: 512 kernel_size: 3 stride: 1 pad: 1 }}layer{ name: "Leaky14" type: "Leaky" bottom: "conv14" top: "Leaky14"}layer{ name: "conv15" type: "Convolution" bottom: "Leaky14" top: "conv15" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param{ num_output: 512 kernel_size: 1 stride: 1 pad: 0 }}layer{ name: "Leaky15" type: "Leaky" bottom: "conv15" top: "Leaky15"}layer{ name: "conv16" type: "Convolution" bottom: "Leaky15" top: "conv16" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param{ num_output: 1024 kernel_size: 3 stride: 1 pad: 1 }}layer{ name: "Leaky16" type: "Leaky" bottom: "conv16" top: "Leaky16"}layer{ name: "pool4" type: "Pooling" bottom: "Leaky16" top: "pool4" pooling_param { pool: MAX kernel_size: 2 stride: 2 }}layer{ name: "conv17" type: "Convolution" bottom: "pool4" top: "conv17" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param{ num_output: 512 kernel_size: 1 stride: 1 pad: 0 }}layer{ name: "Leaky17" type: "Leaky" bottom: "conv17" top: "Leaky17"}layer{ name: "conv18" type: "Convolution" bottom: "Leaky17" top: "conv18" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param{ num_output: 1024 kernel_size: 3 stride: 1 pad: 1 }}layer{ name: "Leaky18" type: "Leaky" bottom: "conv18" top: "Leaky18"}layer{ name: "conv19" type: "Convolution" bottom: "Leaky18" top: "conv19" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param{ num_output: 512 kernel_size: 1 stride: 1 pad: 0 }}layer{ name: "Leaky19" type: "Leaky" bottom: "conv19" top: "Leaky19"}layer{ name: "conv20" type: "Convolution" bottom: "Leaky19" top: "conv20" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param{ num_output: 1024 kernel_size: 3 stride: 1 pad: 1 }}layer{ name: "Leaky20" type: "Leaky" bottom: "conv20" top: "Leaky20"}layer{ name: "conv21" type: "Convolution" bottom: "Leaky20" top: "conv21" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param{ num_output: 1024 kernel_size: 3 stride: 1 pad: 1 }}layer{ name: "Leaky21" type: "Leaky" bottom: "conv21" top: "Leaky21"}layer{ name: "conv22" type: "Convolution" bottom: "Leaky21" top: "conv22" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param{ num_output: 1024 kernel_size: 3 stride: 2 pad: 1 }}layer{ name: "Leaky22" type: "Leaky" bottom: "conv22" top: "Leaky22"}layer{ name: "conv23" type: "Convolution" bottom: "Leaky22" top: "conv23" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param{ num_output: 1024 kernel_size: 3 stride: 1 pad: 1 }}layer{ name: "Leaky23" type: "Leaky" bottom: "conv23" top: "Leaky23"}layer{ name: "conv24" type: "Convolution" bottom: "Leaky23" top: "conv24" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param{ num_output: 1024 kernel_size: 3 stride: 1 pad: 1 }}layer{ name: "Leaky24" type: "Leaky" bottom: "conv24" top: "Leaky24"}layer { name: "connect1" type: "InnerProduct" bottom: "Leaky24" top: "connect1" param { lr_mult: 10 } param { lr_mult: 20 } inner_product_param { num_output: 4096 }}layer{ name: "Leaky25" type: "Leaky" bottom: "connect1" top: "Leaky25"}layer { name: "connect2_add" type: "InnerProduct" bottom: "Leaky25" top: "connect2_add" param { lr_mult: 10 } param { lr_mult: 20 } inner_product_param { num_output: 637 weight_filler { type: "uniform" max: 1 min: -1 } bias_filler { type: "constant" } }}layer { name: "detect" type: "Detect" top: "loss" bottom: "connect2_add" bottom: "label" detect_param { classes: 3 }}
联系客服