开始使用 ORT for C#
目录
使用 .NET CLI 安装 Nuget 包
dotnet add package Microsoft.ML.OnnxRuntime --version 1.16.0
dotnet add package System.Numerics.Tensors --version 0.1.0
导入库
using Microsoft.ML.OnnxRuntime;
using System.Numerics.Tensors;
创建推理方法
这是一个 Azure Function 示例,它使用 ORT 和 C# 对使用 SciKit Learn 创建的 NLP 模型进行推理。
public static async Task<IActionResult> Run(
[HttpTrigger(AuthorizationLevel.Function, "get", "post", Route = null)] HttpRequest req,
ILogger log, ExecutionContext context)
{
log.LogInformation("C# HTTP trigger function processed a request.");
string review = req.Query["review"];
string requestBody = await new StreamReader(req.Body).ReadToEndAsync();
dynamic data = JsonConvert.DeserializeObject(requestBody);
review ??= data.review;
Debug.Assert(!string.IsNullOrEmpty(review), "Expecting a string with a content");
// Get path to model to create inference session.
const string modelPath = "./model.onnx";
// Create an InferenceSession from the Model Path.
// Creating and loading sessions are expensive per request.
// They better be cached
using var session = new InferenceSession(modelPath);
// create input tensor (nlp example)
using var inputOrtValue = OrtValue.CreateTensorWithEmptyStrings(OrtAllocator.DefaultInstance, new long[] { 1, 1 });
inputOrtValue.StringTensorSetElementAt(review, 0);
// Create input data for session. Request all outputs in this case.
var inputs = new Dictionary<string, OrtValue>
{
{ "input", inputOrtValue }
};
using var runOptions = new RunOptions();
// We are getting a sequence of maps as output. We are interested in the first element (map) of the sequence.
// That result is a Sequence of Maps, and we only need the first map from there.
using var outputs = session.Run(runOptions, inputs, session.OutputNames);
Debug.Assert(outputs.Count > 0, "Expecting some output");
// We want the last output, which is the sequence of maps
var lastOutput = outputs[outputs.Count - 1];
// Optional code to check the output type
{
var outputTypeInfo = lastOutput.GetTypeInfo();
Debug.Assert(outputTypeInfo.OnnxType == OnnxValueType.ONNX_TYPE_SEQUENCE, "Expecting a sequence");
var sequenceTypeInfo = outputTypeInfo.SequenceTypeInfo;
Debug.Assert(sequenceTypeInfo.ElementType.OnnxType == OnnxValueType.ONNX_TYPE_MAP, "Expecting a sequence of maps");
}
var elementsNum = lastOutput.GetValueCount();
Debug.Assert(elementsNum > 0, "Expecting a non empty sequence");
// Get the first map in sequence
using var firstMap = lastOutput.GetValue(0, OrtAllocator.DefaultInstance);
// Optional code just checking
{
// Maps always have two elements, keys and values
// We are expecting this to be a map of strings to floats
var mapTypeInfo = firstMap.GetTypeInfo().MapTypeInfo;
Debug.Assert(mapTypeInfo.KeyType == TensorElementType.String, "Expecting keys to be strings");
Debug.Assert(mapTypeInfo.ValueType.OnnxType == OnnxValueType.ONNX_TYPE_TENSOR, "Values are in the tensor");
Debug.Assert(mapTypeInfo.ValueType.TensorTypeAndShapeInfo.ElementDataType == TensorElementType.Float, "Result map value is float");
}
var inferenceResult = new Dictionary<string, float>();
// Let use the visitor to read map keys and values
// Here keys and values are represented with the same number of corresponding entries
// string -> float
firstMap.ProcessMap((keys, values) => {
// Access native buffer directly
var valuesSpan = values.GetTensorDataAsSpan<float>();
var entryCount = (int)keys.GetTensorTypeAndShape().ElementCount;
inferenceResult.EnsureCapacity(entryCount);
for (int i = 0; i < entryCount; ++i)
{
inferenceResult.Add(keys.GetStringElement(i), valuesSpan[i]);
}
}, OrtAllocator.DefaultInstance);
// Return the inference result as json.
return new JsonResult(inferenceResult);
}
重用输入/输出张量缓冲区
在某些情况下,您可能希望重用输入/输出张量。当您想要链接 2 个模型(即,将一个模型的输出作为另一个模型的输入馈送)或想要在多次推理运行期间加速推理速度时,通常会发生这种情况。
链接:将模型 A 的输出作为模型 B 的输入馈送
using Microsoft.ML.OnnxRuntime.Tensors;
using Microsoft.ML.OnnxRuntime;
namespace Samples
{
class FeedModelAToModelB
{
static void Program()
{
const string modelAPath = "./modelA.onnx";
const string modelBPath = "./modelB.onnx";
using InferenceSession session1 = new InferenceSession(modelAPath);
using InferenceSession session2 = new InferenceSession(modelBPath);
// Illustration only
float[] inputData = { 1, 2, 3, 4 };
long[] inputShape = { 1, 4 };
using var inputOrtValue = OrtValue.CreateTensorValueFromMemory(inputData, inputShape);
// Create input data for session. Request all outputs in this case.
var inputs1 = new Dictionary<string, OrtValue>
{
{ "input", inputOrtValue }
};
using var runOptions = new RunOptions();
// session1 inference
using (var outputs1 = session1.Run(runOptions, inputs1, session1.OutputNames))
{
// get intermediate value
var outputToFeed = outputs1.First();
// modify the name of the ONNX value
// create input list for session2
var inputs2 = new Dictionary<string, OrtValue>
{
{ "inputNameForModelB", outputToFeed }
};
// session2 inference
using (var results = session2.Run(runOptions, inputs2, session2.OutputNames))
{
// manipulate the results
}
}
}
}
}
具有固定大小输入和输出的多次推理运行
如果模型具有固定大小的数字张量输入和输出,请使用首选的 OrtValue 及其 API 来加速推理速度并最大限度地减少数据传输。OrtValue 类使得可以重用输入和输出张量的底层缓冲区。它固定托管缓冲区并将其用于推理。它还提供对输出的本机缓冲区的直接访问。您还可以为输出预先分配 OrtValue
或在现有缓冲区之上创建它。这避免了一些开销,这对于较小的模型可能是有益的,因为在较小的模型中,时间在总运行时间中是明显的。
请记住,OrtValue 类,就像 Onnruntime C# API 中的许多其他类一样,是 IDisposable。需要正确处置它,以取消固定托管缓冲区或释放本机缓冲区,以避免内存泄漏。
在 GPU 上运行 (可选)
如果使用 GPU 包,只需在创建 InferenceSession 时使用适当的 SessionOptions。
int gpuDeviceId = 0; // The GPU device ID to execute on
using var gpuSessionOptoins = SessionOptions.MakeSessionOptionWithCudaProvider(gpuDeviceId);
using var session = new InferenceSession("model.onnx", gpuSessionOptoins);
ONNX Runtime C# API
ONNX 运行时为在任何 .NET 标准平台上运行 ONNX 模型推理提供了 C# .NET 绑定。
支持的版本
.NET standard 1.1
构建
工件 | 描述 | 支持的平台 |
---|---|---|
Microsoft.ML.OnnxRuntime | CPU (发布) | Windows、Linux、Mac、X64、X86 (仅限 Windows)、ARM64 (仅限 Windows)...更多详情: 兼容性 |
Microsoft.ML.OnnxRuntime.Gpu | GPU - CUDA (发布) | Windows、Linux、Mac、X64...更多详情: 兼容性 |
Microsoft.ML.OnnxRuntime.DirectML | GPU - DirectML (发布) | Windows 10 1709+ |
onnxruntime | CPU、GPU (开发)、CPU (设备端训练) | 与发布版本相同 |
Microsoft.ML.OnnxRuntime.Training | CPU 设备端训练 (发布) | Windows、Linux、Mac、X64、X86 (仅限 Windows)、ARM64 (仅限 Windows)...更多详情: 兼容性 |
API 参考
示例
请参阅 教程:基础知识 - C#