NCS2 作为一个可用于深度学习的加速棒,在目标跟踪这种非常讲究实时性的应用中是非常的有用的,在工作中我使用3559a实现了目标跟踪的算法,目前海思被制裁造成3559a涨到了3000元一片的价格而且还缺货,所以我将以前的那套跟踪算法移植到了NCS2上。 目前比较可用点的目标跟踪算法主要: 1.基于相关滤波的目标跟踪算法 2.基于深度学习的目标跟踪算法 3.基于检测加位置相关滤波的算法
目前基于相关滤波司法的目标跟踪算法效果已经难满足我的要求了,主要体现在无法长时稳定跟踪,另外对小目标及纹理效果低的目标效果非常的不理想,所以这个方案不考虑。 基于深度学习目标跟踪司法比如siamRPN,最大的问题是计算量太大,而且目标消失后无法准确知道目标消失了,所以也是无法作为长时间目标跟踪的。我的项目需要的是从时间跟踪一个人,我选择的基于人头检测加上基于人头位置前后帧相关性的跟踪算法逻辑,之前人头检测的使用NNIE进行加速,CPU根据NNIE返回的人头位置用于和前帧的位置及运动方向进行融合,判断当前获取的人头位置是否为需要跟踪的那个人的。 1.整个跟踪算法的第一步选定合适的检测模型并实现收集合适的开源人头数据集: 我使用的是SSD模型,这个模型效果还可以并且最重要的是比较方便的部署在设备端。另外一个合适的人头数据集也非常的重要,我最后测试了多种人头检测的数据集发现hollywoodHead这个人头数据集记过各种调参后训练得到的检测效果是比较好的。 2.训练电脑环境安装: 我训练模型所使用的电脑配置为:I7 8系列的CPU、32G内存、RTX2070显卡一张 3.使用SSD模型训练自己的人头检测模型: SSD上实现各种物品和人脸活在人体检测检测的很多,但是为了实现简化SSD的计算量我需要训练一个只检测人头的单分类模型,这个可以比那些基于COCO或者其他现成的检测模型的计算量小很多。相比原版的SSD训练代码改了一些地方,上训练代码:
- # coding=utf-8
- from __future__ import print_function
- import caffe
- from caffe.model_libs import *
- from google.protobuf import text_format
- import math
- import os
- import shutil
- import stat
- import subprocess
- import sys
- def AddExtraLayers(net, use_batchnorm=True, lr_mult=1):
- use_relu = True
- from_layer = net.keys()[-1]
- out_layer = "conv6_1"
- ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 256, 1, 0, 1,
- lr_mult=lr_mult)
- from_layer = out_layer
- out_layer = "conv6_2"
- ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 512, 3, 1, 2,
- lr_mult=lr_mult)
- from_layer = out_layer
- out_layer = "conv7_1"
- ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 128, 1, 0, 1,
- lr_mult=lr_mult)
- from_layer = out_layer
- out_layer = "conv7_2"
- ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 256, 3, 1, 2,
- lr_mult=lr_mult)
- from_layer = out_layer
- out_layer = "conv8_1"
- ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 128, 1, 0, 1,
- lr_mult=lr_mult)
- from_layer = out_layer
- out_layer = "conv8_2"
- ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 256, 3, 0, 1,
- lr_mult=lr_mult)
- from_layer = out_layer
- out_layer = "conv9_1"
- ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 128, 1, 0, 1,
- lr_mult=lr_mult)
- from_layer = out_layer
- out_layer = "conv9_2"
- ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 256, 3, 0, 1,
- lr_mult=lr_mult)
- return net
- caffe_root = os.getcwd()
- run_soon = True
- resume_training = True
- remove_old_models = False
- train_data = "examples/hollywoodhead/hollywoodhead_trainval_lmdb"
- test_data = "examples/hollywoodhead/hollywoodhead_test_lmdb"
- resize_width = 300
- resize_height = 300
- resize = "{}x{}".format(resize_width, resize_height)
- batch_sampler = [
- {
- 'sampler': {
- },
- 'max_trials': 1,
- 'max_sample': 1,
- },
- {
- 'sampler': {
- 'min_scale': 0.3,
- 'max_scale': 1.0,
- 'min_aspect_ratio': 0.5,
- 'max_aspect_ratio': 2.0,
- },
- 'sample_constraint': {
- 'min_jaccard_overlap': 0.1,
- },
- 'max_trials': 50,
- 'max_sample': 1,
- },
- {
- 'sampler': {
- 'min_scale': 0.3,
- 'max_scale': 1.0,
- 'min_aspect_ratio': 0.5,
- 'max_aspect_ratio': 2.0,
- },
- 'sample_constraint': {
- 'min_jaccard_overlap': 0.3,
- },
- 'max_trials': 50,
- 'max_sample': 1,
- },
- {
- 'sampler': {
- 'min_scale': 0.3,
- 'max_scale': 1.0,
- 'min_aspect_ratio': 0.5,
- 'max_aspect_ratio': 2.0,
- },
- 'sample_constraint': {
- 'min_jaccard_overlap': 0.5,
- },
- 'max_trials': 50,
- 'max_sample': 1,
- },
- {
- 'sampler': {
- 'min_scale': 0.3,
- 'max_scale': 1.0,
- 'min_aspect_ratio': 0.5,
- 'max_aspect_ratio': 2.0,
- },
- 'sample_constraint': {
- 'min_jaccard_overlap': 0.7,
- },
- 'max_trials': 50,
- 'max_sample': 1,
- },
- {
- 'sampler': {
- 'min_scale': 0.3,
- 'max_scale': 1.0,
- 'min_aspect_ratio': 0.5,
- 'max_aspect_ratio': 2.0,
- },
- 'sample_constraint': {
- 'min_jaccard_overlap': 0.9,
- },
- 'max_trials': 50,
- 'max_sample': 1,
- },
- {
- 'sampler': {
- 'min_scale': 0.3,
- 'max_scale': 1.0,
- 'min_aspect_ratio': 0.5,
- 'max_aspect_ratio': 2.0,
- },
- 'sample_constraint': {
- 'max_jaccard_overlap': 1.0,
- },
- 'max_trials': 50,
- 'max_sample': 1,
- },
- ]
- train_transform_param = {
- 'mirror': True,
- 'mean_value': [104, 117, 123],
- 'resize_param': {
- 'prob': 1,
- 'resize_mode': P.Resize.WARP,
- 'height': resize_height,
- 'width': resize_width,
- 'interp_mode': [
- P.Resize.LINEAR,
- P.Resize.AREA,
- P.Resize.NEAREST,
- P.Resize.CUBIC,
- P.Resize.LANCZOS4,
- ],
- },
- 'distort_param': {
- 'brightness_prob': 0.5,
- 'brightness_delta': 32,
- 'contrast_prob': 0.5,
- 'contrast_lower': 0.5,
- 'contrast_upper': 1.5,
- 'hue_prob': 0.5,
- 'hue_delta': 18,
- 'saturation_prob': 0.5,
- 'saturation_lower': 0.5,
- 'saturation_upper': 1.5,
- 'random_order_prob': 0.0,
- },
- 'expand_param': {
- 'prob': 0.5,
- 'max_expand_ratio': 4.0,
- },
- 'emit_constraint': {
- 'emit_type': caffe_pb2.EmitConstraint.CENTER,
- }
- }
- test_transform_param = {
- 'mean_value': [104, 117, 123],
- 'resize_param': {
- 'prob': 1,
- 'resize_mode': P.Resize.WARP,
- 'height': resize_height,
- 'width': resize_width,
- 'interp_mode': [P.Resize.LINEAR],
- },
- }
- use_batchnorm = False
- lr_mult = 1
- if use_batchnorm:
- base_lr = 0.0004
- else:
- base_lr = 0.00004
- job_name = "TK_{}".format(resize)
- model_name = "hollywoodhead_{}".format(job_name)
- save_dir = "models/VGGNet/hollywoodhead/{}".format(job_name)
- snapshot_dir = "models/VGGNet/hollywoodhead/{}".format(job_name)
- job_dir = "jobs/VGGNet/hollywoodhead/{}".format(job_name)
- output_result_dir = "{}/data/VOCdevkit/results/hollywoodhead/{}/Main".format(os.environ['HOME'], job_name)
- train_net_file = "{}/train.prototxt".format(save_dir)
- test_net_file = "{}/test.prototxt".format(save_dir)
- deploy_net_file = "{}/deploy.prototxt".format(save_dir)
- solver_file = "{}/solver.prototxt".format(save_dir)
- snapshot_prefix = "{}/{}".format(snapshot_dir, model_name)
- job_file = "{}/{}.sh".format(job_dir, model_name)
- name_size_file = "data/hollywoodhead/test_name_size.txt"
- pretrain_model = "models/VGGNet/VGG_ILSVRC_16_layers_fc_reduced.caffemodel"
- label_map_file = "data/hollywoodhead/labelmap_voc.prototxt"
- num_classes = 2
- share_location = True
- background_label_id=0
- train_on_diff_gt = True
- normalization_mode = P.Loss.VALID
- code_type = P.PriorBox.CENTER_SIZE
- ignore_cross_boundary_bbox = False
- mining_type = P.MultiBoxLoss.MAX_NEGATIVE
- neg_pos_ratio = 3.
- loc_weight = (neg_pos_ratio + 1.) / 4.
- multibox_loss_param = {
- 'loc_loss_type': P.MultiBoxLoss.SMOOTH_L1,
- 'conf_loss_type': P.MultiBoxLoss.SOFTMAX,
- 'loc_weight': loc_weight,
- 'num_classes': num_classes,
- 'share_location': share_location,
- 'match_type': P.MultiBoxLoss.PER_PREDICTION,
- 'overlap_threshold': 0.5,
- 'use_prior_for_matching': True,
- 'background_label_id': background_label_id,
- 'use_difficult_gt': train_on_diff_gt,
- 'mining_type': mining_type,
- 'neg_pos_ratio': neg_pos_ratio,
- 'neg_overlap': 0.5,
- 'code_type': code_type,
- 'ignore_cross_boundary_bbox': ignore_cross_boundary_bbox,
- }
- loss_param = {
- 'normalization': normalization_mode,
- }
- min_dim = 512
- mbox_source_layers = ['conv4_3', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', 'conv9_2', 'conv10_2']
- min_ratio = 20
- max_ratio = 90
- step = int(math.floor((max_ratio - min_ratio) / (len(mbox_source_layers) - 2))) #
- min_sizes = []
- max_sizes = []
- for ratio in xrange(min_ratio, max_ratio + 1, step):
- min_sizes.append(min_dim * ratio / 100.)
- max_sizes.append(min_dim * (ratio + step) / 100.)
- min_sizes = [min_dim * 10 / 100.] + min_sizes
- max_sizes = [min_dim * 20 / 100.] + max_sizes
- steps = [8, 16, 32, 64, 100, 300]
- aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
- normalizations = [20, -1, -1, -1, -1, -1]
- if code_type == P.PriorBox.CENTER_SIZE:
- prior_variance = [0.1, 0.1, 0.2, 0.2]
- else:
- prior_variance = [0.1]
- flip = True
- clip = False
- gpus = "0"
- gpulist = gpus.split(",")
- num_gpus = len(gpulist)
- batch_size = 16
- accum_batch_size = 16
- iter_size = accum_batch_size / batch_size
- solver_mode = P.Solver.CPU
- device_id = 0
- batch_size_per_device = batch_size
- if num_gpus > 0:
- batch_size_per_device = int(math.ceil(float(batch_size) / num_gpus))
- iter_size = int(math.ceil(float(accum_batch_size) / (batch_size_per_device * num_gpus)))
- solver_mode = P.Solver.GPU
- device_id = int(gpulist[0])
- if normalization_mode == P.Loss.NONE:
- base_lr /= batch_size_per_device
- elif normalization_mode == P.Loss.VALID:
- base_lr *= 25. / loc_weight
- elif normalization_mode == P.Loss.FULL:
- base_lr *= 2000.
- num_test_image = 500
- test_batch_size = 4
- test_iter = num_test_image / test_batch_size
- solver_param = {
- 'base_lr': base_lr,
- 'weight_decay': 0.0005,
- 'lr_policy': "multistep",
- 'stepvalue': [80000, 100000, 120000],
- 'gamma': 0.1,
- 'momentum': 0.9,
- 'iter_size': iter_size,
- 'max_iter': 120000,
- 'snapshot': 40000,
- 'display': 10,
- 'average_loss': 10,
- 'type': "SGD",
- 'solver_mode': solver_mode,
- 'device_id': device_id,
- 'debug_info': False,
- 'snapshot_after_train': True,
- 'test_iter': [test_iter],
- 'test_interval': 10000,
- 'eval_type': "detection",
- 'ap_version': "11point",
- 'test_initialization': False,
- }
- det_out_param = {
- 'num_classes': num_classes,
- 'share_location': share_location,
- 'background_label_id': background_label_id,
- 'nms_param': {'nms_threshold': 0.45, 'top_k': 400},
- 'save_output_param': {
- 'output_directory': output_result_dir,
- 'output_name_prefix': "comp4_det_test_",
- 'output_format': "VOC",
- 'label_map_file': label_map_file,
- 'name_size_file': name_size_file,
- 'num_test_image': num_test_image,
- },
- 'keep_top_k': 200,
- 'confidence_threshold': 0.01,
- 'code_type': code_type,
- }
- det_eval_param = {
- 'num_classes': num_classes,
- 'background_label_id': background_label_id,
- 'overlap_threshold': 0.5,
- 'evaluate_difficult_gt': False,
- 'name_size_file': name_size_file,
- }
- check_if_exist(train_data)
- check_if_exist(test_data)
- check_if_exist(label_map_file)
- check_if_exist(pretrain_model)
- make_if_not_exist(save_dir)
- make_if_not_exist(job_dir)
- make_if_not_exist(snapshot_dir)
- net = caffe.NetSpec()
- net.data, net.label = CreateAnnotatedDataLayer(train_data, batch_size=batch_size_per_device,
- train=True, output_label=True, label_map_file=label_map_file,
- transform_param=train_transform_param, batch_sampler=batch_sampler)
- VGGNetBody(net, from_layer='data', fully_conv=True, reduced=True, dilated=True, dropout=False)
- AddExtraLayers(net, use_batchnorm, lr_mult=lr_mult)
- mbox_layers = CreateMultiBoxHead(net, data_layer='data', from_layers=mbox_source_layers,
- use_batchnorm=use_batchnorm, min_sizes=min_sizes, max_sizes=max_sizes,
- aspect_ratios=aspect_ratios, steps=steps, normalizations=normalizations,
- num_classes=num_classes, share_location=share_location, flip=flip, clip=clip,
- prior_variance=prior_variance, kernel_size=3, pad=1, lr_mult=lr_mult)
- name = "mbox_loss"
- mbox_layers.append(net.label)
- net[name] = L.MultiBoxLoss(*mbox_layers, multibox_loss_param=multibox_loss_param,
- loss_param=loss_param, include=dict(phase=caffe_pb2.Phase.Value('TRAIN')),
- propagate_down=[True, True, False, False])
- with open(train_net_file, 'w') as f:
- print('name: "{}_train"'.format(model_name), file=f)
- print(net.to_proto(), file=f)
- shutil.copy(train_net_file, job_dir)
- net = caffe.NetSpec()
- net.data, net.label = CreateAnnotatedDataLayer(test_data, batch_size=test_batch_size,
- train=False, output_label=True, label_map_file=label_map_file,
- transform_param=test_transform_param)
- VGGNetBody(net, from_layer='data', fully_conv=True, reduced=True, dilated=True,
- dropout=False)
- AddExtraLayers(net, use_batchnorm, lr_mult=lr_mult)
- mbox_layers = CreateMultiBoxHead(net, data_layer='data', from_layers=mbox_source_layers,
- use_batchnorm=use_batchnorm, min_sizes=min_sizes, max_sizes=max_sizes,
- aspect_ratios=aspect_ratios, steps=steps, normalizations=normalizations,
- num_classes=num_classes, share_location=share_location, flip=flip, clip=clip,
- prior_variance=prior_variance, kernel_size=3, pad=1, lr_mult=lr_mult)
- conf_name = "mbox_conf"
- if multibox_loss_param["conf_loss_type"] == P.MultiBoxLoss.SOFTMAX:
- reshape_name = "{}_reshape".format(conf_name)
- net[reshape_name] = L.Reshape(net[conf_name], shape=dict(dim=[0, -1, num_classes]))
- softmax_name = "{}_softmax".format(conf_name)
- net[softmax_name] = L.Softmax(net[reshape_name], axis=2)
- flatten_name = "{}_flatten".format(conf_name)
- net[flatten_name] = L.Flatten(net[softmax_name], axis=1)
- mbox_layers[1] = net[flatten_name]
- elif multibox_loss_param["conf_loss_type"] == P.MultiBoxLoss.LOGISTIC:
- sigmoid_name = "{}_sigmoid".format(conf_name)
- net[sigmoid_name] = L.Sigmoid(net[conf_name])
- mbox_layers[1] = net[sigmoid_name]
- net.detection_out = L.DetectionOutput(*mbox_layers,
- detection_output_param=det_out_param,
- include=dict(phase=caffe_pb2.Phase.Value('TEST')))
- net.detection_eval = L.DetectionEvaluate(net.detection_out, net.label,
- detection_evaluate_param=det_eval_param,
- include=dict(phase=caffe_pb2.Phase.Value('TEST')))
- with open(test_net_file, 'w') as f:
- print('name: "{}_test"'.format(model_name), file=f)
- print(net.to_proto(), file=f)
- shutil.copy(test_net_file, job_dir)
- deploy_net = net
- with open(deploy_net_file, 'w') as f:
- net_param = deploy_net.to_proto()
- del net_param.layer[0]
- del net_param.layer[-1]
- net_param.name = '{}_deploy'.format(model_name)
- net_param.input.extend(['data'])
- net_param.input_shape.extend([
- caffe_pb2.BlobShape(dim=[1, 3, resize_height, resize_width])])
- print(net_param, file=f)
- shutil.copy(deploy_net_file, job_dir)
- solver = caffe_pb2.SolverParameter(
- train_net=train_net_file,
- test_net=[test_net_file],
- snapshot_prefix=snapshot_prefix,
- **solver_param)
- with open(solver_file, 'w') as f:
- print(solver, file=f)
- shutil.copy(solver_file, job_dir)
- max_iter = 0
- for file in os.listdir(snapshot_dir):
- if file.endswith(".solverstate"):
- basename = os.path.splitext(file)[0]
- iter = int(basename.split("{}_iter_".format(model_name))[1])
- if iter > max_iter:
- max_iter = iter
- train_src_param = '--weights="{}"
- '.format(pretrain_model)
- if resume_training:
- if max_iter > 0:
- train_src_param = '--snapshot="{}_iter_{}.solverstate"
- '.format(snapshot_prefix, max_iter)
- if remove_old_models:
- for file in os.listdir(snapshot_dir):
- if file.endswith(".solverstate"):
- basename = os.path.splitext(file)[0]
- iter = int(basename.split("{}_iter_".format(model_name))[1])
- if max_iter > iter:
- os.remove("{}/{}".format(snapshot_dir, file))
- if file.endswith(".caffemodel"):
- basename = os.path.splitext(file)[0]
- iter = int(basename.split("{}_iter_".format(model_name))[1])
- if max_iter > iter:
- os.remove("{}/{}".format(snapshot_dir, file))
- with open(job_file, 'w') as f:
- f.write('cd {}
- '.format(caffe_root))
- f.write('./build/tools/caffe train
- ')
- f.write('--solver="{}"
- '.format(solver_file))
- f.write(train_src_param)
- if solver_param['solver_mode'] == P.Solver.GPU:
- f.write('--gpu {} 2>&1 | tee {}/{}.log
- '.format(gpus, job_dir, model_name))
- else:
- f.write('2>&1 | tee {}/{}.log
- '.format(job_dir, model_name))
- py_file = os.path.abspath(__file__)
- shutil.copy(py_file, job_dir)
- os.chmod(job_file, stat.S_IRWXU)
- if run_soon:
- subprocess.call(job_file, shell=True)
总的训练耗时在8小时左右!!!!!!!!!!!!!!!!!!!!!!!! 得到人头检测模型后可以根据SSD官方模型测试代码稍微改一下就可以用于测试自己的模型,这部分我就不贴代码了。。。。。。 4.在NCS2 部署你的SSD 人头检测模型 NCS2 是基于OpenVINO框架,所以想在NCS2部分SSD其实可以直接参考OpenVINO SSD 的列子。 将caffe版的SSD转化为openvino框架下的模型: 进入/opt/intel/openvino/deployment_tools/model_optimizer$目录 执行
参数如下: –input_proto:prototxt文件所在位置 -k:CustomLayersMapping.xml文件所在位置 –input_mode: –mean_file: –mean_file_offsets: –output_dir –scale SCALE –log_level {CRITICAL,ERROR,WARN,WARNING,INFO,DEBUG,NOTSET}] –input –mean_values –data_type {FP16,FP32,half,float}] [–disable_fusing] –disable_resnet_optimization –finegrain_fusing –enable_concat_optimization –extensions –silent –freeze_placeholder_with_value –generate_deprecated_IR_V2 –mean_file_offsets –disable_omitting_optional –enable_flattening_nested_params
另外我也罗列了 OpenVINO支持的caffe模型a. Classification models:
- AlexNet
- VGG-16, VGG-19
- SqueezeNet v1.0, SqueezeNet v1.1
- ResNet-50, ResNet-101, Res-Net-152
- Inception v1, Inception v2, Inception v3, Inception v4
- CaffeNet
- MobileNet
- Squeeze-and-Excitation Networks: SE-BN-Inception, SE-Resnet-101, SE-ResNet-152, SE-ResNet-50, SE-ResNeXt-101, SE-ResNeXt-50
- ShuffleNet v2
b. Object detection models:
- SSD300-VGG16, SSD500-VGG16
- Faster-RCNN
- RefineDet (Myriad plugin only)
c. Face detection models:
d. Semantic segmentation models:
OpenVINO支持的caffe层与其在Intermediate Representation (IR)中的对应关系[td]
1 |
Input |
Input |
2 |
GlobalInput |
Input |
3 |
InnerProduct |
FullyConnected |
4 |
Dropout |
Ignored, does not appear in IR |
5 |
Convolution |
Convolution |
6 |
Deconvolution |
Deconvolution |
7 |
Pooling |
Pooling |
8 |
BatchNorm |
BatchNormalization |
9 |
Norm |
10 |
Power |
Power |
11 |
ReLU |
ReLU |
12 |
Scale |
ScaleShift |
13 |
Concat |
Concat |
14 |
Eltwise |
Eltwise |
15 |
Flatten |
Flatten |
16 |
Reshape |
Reshape |
17 |
Slice |
Slice |
18 |
Softmax |
SoftMax |
19 |
Permute |
Permute |
20 |
ROIPooling |
ROIPooling |
21 |
Tile |
Tile |
22 |
ShuffleChannel |
Reshape + Split + Permute + Concat |
23 |
Axpy |
ScaleShift + Eltwise |
24 |
BN |
ScaleShift |
25 |
DetectionOutput |
DetectionOutput |
26 |
StridedSlice |
StridedSlice |
27 |
Bias |
Eltwise(operation = sum) |
参考这个链接: https://docs.openvinotoolkit.org/latest/openvino_docs_MO_DG_prepare_model_convert_model_Convert_Model_From_Caffe.html
实际在NCS2运行的代码建议参考ncappzoo 里面SSD的代码,这部分我就不提供了。 在NCS2上运行人头检测得到人头位置后就可以在CP上根据这些信息跟踪了,核心代码如下:
- switch(status)
- {
- head_detection(bgr, headResult);
- for(int i = 0; i < headResult.size(); i++)
- {
- actual_box = drawRect(pData, headResult[i], Scalar(255, 0, 0), offset_x, offset_y, offset_s, img_w, img_h);
- }
- if(headResult.size() == 1)
- {
- if(idx < filteQueueDetect)
- {
- headFilte[idx] = headResult[0];
- idx++;
- }
- else
- {
- for(i = 0; i < filteQueueDetect - 1; i++)
- {
- headFilte[i] = headFilte[i + 1];
- }
- headFilte[filteQueueDetect - 1] = headResult[0];
- var = variance(headFilte, filteQueueDetect);
- // printf("var = %f
- ", var);
- if(var < 1)
- {
- box_src = headResult[0];
- box_src_last = box_src;
- // status = OBJ_TRACK_INIT;
- status = TICK_HEAD_DETECT;
- idx = 0;
- }
- }
- }
- break;
- // dst = mRGBAImg.clone();
- tracker.init(box_src, dst);
- actual_box = drawRect(pData, box_src, Scalar(0, 255, 255), offset_x, offset_y, offset_s, img_w, img_h);
- status = OBJ_TRACK;
- break;
- case OBJ_TRACK:
- result = tracker.update(dst);
- actual_box = drawRect(pData, result, Scalar(0, 255, 255), offset_x, offset_y, offset_s, img_w, img_h);
- if(count++ == 5)
- {
- status = TICK_HEAD_DETECT;
- count = 0;
- detect_count = 0;
- }
- break;
- head_detection(bgr, headResult);
- IOU = 0;
- IOU_max = 0;
- for(i = 0; i < headResult.size(); i++)
- {
- IOU = iou(box_src_last, headResult[i]);
- if(IOU_max < IOU && IOU>0)
- {
- IOU_max = IOU;
- box_src = headResult[i];
- }
- }
- if(IOU_max > 0.1)
- {
- status = TICK_HEAD_DETECT;
- int mean_x=0,mean_y=0,mean_w=0,mean_h=0;
- if(box_src.area() < box_src_last.area()*1.5 && box_src.area() > box_src_last.area()*0.7)
- {
- if(idx < trackQueueDetect)
- {
- trackFilte[idx] = box_src;
- if(idx>2)
- {
- for(i=0;i
- {
- mean_x += trackFilte[i].x;
- mean_y += trackFilte[i].y;
- mean_w += trackFilte[i].width;
- mean_h += trackFilte[i].height;
- }
- box_src.x = (int)(mean_x/(idx));
- box_src.y = (int)(mean_y/(idx));
- box_src.width = (int)(mean_w/(idx));
- box_src.width = (int)(mean_h/(idx));
- }
- idx++;
- }
- else
- {
- trackFilte[trackQueueDetect - 1] = box_src;
- for(i = 0; i < trackQueueDetect - 1; i++)
- {
- trackFilte[i] = trackFilte[i + 1];
- mean_x += trackFilte[i].x;
- mean_y += trackFilte[i].y;
- mean_w += trackFilte[i].width;
- mean_h += trackFilte[i].height;
- }
- box_src.x = (int)((mean_x/(trackQueueDetect-1)+box_src.x)/2);
- box_src.y = (int)((mean_y/(trackQueueDetect-1)+box_src.y)/2);
- box_src.width = (int)((mean_w/(trackQueueDetect-1)+box_src.width)/2);
- box_src.height = (int)((mean_h/(trackQueueDetect-1)+box_src.height)/2);
- }
- actual_box = drawRect(pData, box_src, Scalar(0, 255, 255), offset_x, offset_y, offset_s, img_w, img_h);
- box_src_last = box_src;
- target_width = (int)(actual_box.width*(camera_w/img_w));
- target_height = (int)(actual_box.height*(camera_h/img_h));
- target_x = (int)(actual_box.x*(camera_w/img_w)+target_width/2);
- target_y = (int)(actual_box.y*(camera_h/img_h)+target_height/2);
- printf("box_src.x=%d,box_src.y=%d,box_src.width=%d,box_src.height=%d,target_x=%d,target_y=%d,target_width=%d,target_height=%d
- ", box_src.x, box_src.y, box_src.width, box_src.height, target_x, target_y, target_width, target_height);
- }
- }
- else
- {
- detect_count++;
- if(detect_count > 30)
- {
- detect_count = 0;
- status = HEAD_DETECT;
- offset_x = src_init_x;
- offset_y = src_init_y;
- offset_s = 1;
- }
- }
- break;
- default:
- break;
- }
测试在NSC2的检测结果如下: 速度每秒22帧,这个速度算非常不错了。 总结:使用这个计算棒基本可以代替我目前在3559A上实现的这个项目的需求,而且这个价格还不错,如果能直接购买这个芯片集成到自己的产品中价格那是非常有优势的,后续打算和RK3399搭配使用。