Skip to content

widerwew/tensorrt

Repository files navigation

tensorrt

tensorrt 的第一步是定义模型网络,可以选择从tensorrt的语法库中导入模型网络例如caffe、uff( used for tensorflow)、ONNX,也可以选择利用tensorrt API自定义网络

C++ 1.使用解析器引入模型网络 a.创建一个tensorrt builder和网络(顺序不能反) b.创建一个具体格式的tensorrt parser c.使用创建的解析器parser解析引入网络并移植

ex. Import caffe model
	IBuilder* builder = createInferBuilder(gLogger);
       INetworkDefinition* network = builder->createNetwork()//创建builder和network
       ICaffeParser* parser = createCaffeParser();//创建解析器
      const IBlobNameToTensor* blobNameToTensor = parser->parse("deploy_file" ,
      		"modelFile", *network, DataType::kFLOAT);//解析植入的网络

import  tensorflow model
	IBuilder* builder = createInferBuilder(gLogger);
      	INetworkDefinition* network = builder->createNetwork()//创建builder和network
	IUFFParser* parser = createUffParser();//创建解析器
	parser->registerInput("Input_0", DimsCHW(1, 28, 28), UffInputOrder::kNCHW);
	parser->registerOutput("Binary_3”);//声明网络的输入和输出
	parser->parse(uffFile, *network, nvinfer1::DataType::kFLOAT); //解析植入模型网络

import  ONNX model

	nvonnxparser::IOnnxConfig* config = nvonnxparser::createONNXConfig();//Create Parser
	nvonnxparser::IONNXParser* parser = nvonnxparser::createONNXParser(*config);
	parser->parse(onnx_filename, DataType::kFLOAT); //解析ONNX模型
	parser->convertToTRTNetwork(); //转换模型成Tensorflow模型
	nvinfer1::INetworkDefinition* trtNetwork = parser->getTRTNetwork(); //从解析器中获取转换成的网络


2.使用tensorrt API创建网络

ex.使用api创建一个输入、卷积、pooling、全连接、激活、softmax的网络

	IBuilder* builder = createInferBuilder(gLogger);
	INetworkDefinition* network = builder->createNetwork();//创建builder和network
	auto data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{1, INPUT_H,INPUT_W});//创建网络Input
	
	layerName->getOutput(0)
	auto conv1 = network->addConvolution(*data->getOutput(0), 20, DimsHW{5, 5},weightMap["conv1filter"], weightMap["conv1bias"]);
	conv1->setStride(DimsHW{1, 1});//添加卷积层
	auto pool1 = network->addPooling(*conv1->getOutput(0), PoolingType::kMAX,DimsHW{2, 2});
	pool1->setStride(DimsHW{2, 2});//添加pool层
	auto ip1 = network->addFullyConnected(*pool1->getOutput(0), 500,weightMap["ip1filter"], weightMap["ip1bias”]);//添加全连接层
	auto relu1 = network->addActivation(*ip1->getOutput(0),ActivationType::kRELU);//添加rule层
	auto prob = network->addSoftMax(*relu1->getOutput(0));//添加softmax层
	prob->getOutput(0)->setName(OUTPUT_BLOB_NAME);

3.创建引擎

	builder->setMaxBatchSize(maxBatchSize);
	builder->setMaxWorkspaceSize(1 << 20);
	ICudaEngine* engine = builder->buildCudaEngine(*network);//创建引擎

4.序列化模型

	IHostMemory *serializedModel = engine->serialize();
	// store model to disk
	serializedModel->destroy();

	IRuntime* runtime = createInferRuntime(gLogger);
	ICudaEngine* engine = runtime->deserializeCudaEngine(modelData, modelSize,nullptr); //创建一个运行对象来反序列化

5.执行推理

	IExecutionContext *context = engine->createExecutionContext();//上下文伴随着引擎建立,它存储了引擎所需要的模型参数、网络定义
	int inputIndex = engine.getBindingIndex(INPUT_BLOB_NAME);
	int outputIndex = engine.getBindingIndex(OUTPUT_BLOB_NAME);//根据输入输出blob名字获取对应的输入输出索引
	void* buffers[2];
	buffers[inputIndex] = inputbuffer;
	buffers[outputIndex] = outputBuffer;//根据index在GPU开辟对应的buffer
	context.enqueue(batchSize, buffers, stream, nullptr);//将对应的buffer数据推入GPU steam队列

Python语言

1.使用解析器解析网络模型

ex.import caffe model
	import tensorrt as trt//引入tensorrt 包
	datatype = trt.infer.DataType.FLOAT//设置相关数据类型为float32
	MODEL_PROTOTXT = '/data/mnist/mnist.prototxt'
	CAFFE_MODEL = '/data/mnist/mnist.caffemodel’//设置prototxt、和model文件路径
	builder = trt.infer.create_infer_builder(G_LOGGER) //创建builder
		network = builder.create_network() //创建网络
	parser = parsers.caffeparser.create_caffe_parser() //创建对应的解析器
	blob_name_to_tensor = parser.parse(CAFFE_MODEL, MODEL_PROTOTXT, network, datatype)//解析网络

import tensorflow model
	import tensorrt as trt
	from tensorrt.parsers import uffparser//引入相关包
	import uff
	uff.from_tensorflow_frozen_model(frozen_file, ["fc2/Relu"])
	parser = uffparser.create_uff_parser()//创建解析器
	parser.register_input("Placeholder", (1, 28, 28), 0)
	parser.register_output("fc2/Relu”)//注册解析器的输入和输出
	engine = trt.utils.uff_to_trt_engine(G_LOGGER,
                                 				uff_model,
						parser,
						MAX_BATCHSIZE,
						MAX_WORKSPACE)//创建引擎

2.使用python API创建网络

builder = trt.infer.create_infer_builder(G_LOGGER)
network = builder.create_network()//创建网络及builder
data = network.add_input(INPUT_LAYERS[0], dt, (1, INPUT_H, INPUT_W))//添加网络输入层
weight_map = trt.utils.load_weights(weights_file)//生成权重map

conv1 = network.add_convolution(scale1.get_output(0), 20, (5,5), weight_map["conv1filter"], weight_map["conv1bias"]) 
conv1.set_stride((1,1)//添加卷积层
pool1 = network.add_pooling(conv1.get_output(0), trt.infer.PoolingType.MAX,(2,2))
pool1.set_stride((2,2))//添加pool层

p1 = network.add_fully_connected(pool2.get_output(0), 500,weight_map["ip1filter"], weight_map["ip1bias"])
relu1 = network.add_activation(ip1.get_output(0),trt.infer.ActivationType.RELU)//添加全连接层和激活层

prob = network.add_softmax(ip2.get_output(0))
prob.get_output(0).set_name(OUTPUT_LAYERS[0])
network.mark_output(prob.get_output(0))//设置softmax层及声明该层为输出层

3.创建引擎

builder.set_max_batch_size(max_batch_size)//该参数决定了在优化的时候优化的空间大小
builder.set_max_workspace_size(1 << 20)
engine = builder.build_cuda_engine(network)//创建引擎

4.模型序列化及反序列化

IHostMemory *serializedModel = engine->serialize();
// store model to disk
serializedModel->destroy();


modelstream = engine.serialize()//序列化
engine.destroy()
builder.destroy()

runtime = trt.infer.create_infer_runtime(GLOGGER)
engine =runtime.deserialize_cuda_engine(modelstream.data(),modelstream.size(), None)
modelstream.destroy()//反序列化

5.执行推理

context = engine.create_execution_context()
d_input = cuda.mem_alloc(insize)
d_output = cuda.mem_alloc(outsize)
bindings = [int(d_input), int(d_output)]
context.enqueue(batch_size, bindings, stream.handle, None)
cuda.memcpy_dtoh_async(output, d_output, stream)
stream.synchronize()
return output

About

No description, website, or topics provided.

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published

Languages