OpenVINO 2022.3 Eight: OpenVINO Async API
OpenVINO Async API is a programming interface provided by the OpenVINO toolkit for asynchronous inference of deep learning models. It allows developers to execute multiple inference requests concurrently and optimize the utilization of hardware resources.
The OpenVINO Inference Request API provides both synchronous and asynchronous execution. ov::InferRequest::infer() is inherently synchronous and easy to manipulate. Asynchronously “split” infer() into ov::InferRequest::start_async() and ov::InferRequest::wait() (or callback functions). While synchronous APIs may be easier to start with, asynchronous APIs are recommended for production code. Because the asynchronous API implements flow control for any possible number of requests.
c++ sample code:
Image classification code, code from openvino example
#include <sys/stat.h> #include <condition_variable> #include <fstream> #include <map> #include <memory> #include <mutex> #include <string> #include <vector> // clang-format off #include "openvino/openvino.hpp" #include "format_reader_ptr.h" #include <iostream> #include <string> #include <vector> constexpr auto N_TOP_RESULTS = 10; using namespace ov::preprocess; using namespace std; int main() { try { // -------- Get OpenVINO Runtime version -------- cout << ov::get_openvino_version() << endl; // -------- Read input -------- std::string model_file("E:\weight\openvino\public\googlenet-v1\FP32\googlenet-v1.xml"); std::string image_path("E:\images"); std::vector<std::string> image_names; image_names.push_back("E:\images\car.bmp"); image_names.push_back("E:\images\car.bmp"); image_names.push_back("E:\images\car.bmp"); image_names.push_back("E:\images\car.bmp"); // -------- Step 1. Initialize OpenVINO Runtime Core -------- ov::Core core; // -------- Step 2. Read a model -------- cout << "Loading model files:" << endl << model_file << endl; std::shared_ptr<ov::Model> model = core.read_model(model_file); // OPENVINO_ASSERT(model->inputs().size() == 1, "Sample supports models with 1 input only"); // OPENVINO_ASSERT(model->outputs().size() == 1, "Sample supports models with 1 output only"); // -------- Step 3. Configure preprocessing -------- const ov::Layout tensor_layout{ "NHWC" }; ov::preprocess::PrePostProcessor ppp(model); // 1) input() with no args assumes a model has a single input ov::preprocess::InputInfo & input_info = ppp.input(); // 2) Set input tensor information: // - precision of tensor is supposed to be 'u8' // - layout of data is 'NHWC' input_info.tensor().set_element_type(ov::element::u8).set_layout(tensor_layout); // 3) Here we suppose model has 'NCHW' layout for input input_info.model().set_layout("NCHW"); // 4) output() with no args assumes a model has a single result // - output() with no args assumes a model has a single result // - precision of tensor is supposed to be 'f32' ppp.output().tensor().set_element_type(ov::element::f32); // 5) Once the build() method is called, the pre(post)processing steps // for layout and precision conversions are inserted automatically model = ppp. build(); // -------- Step 4. read input images -------- cout << "Read input images" << endl; ov::Shape input_shape = model->input().get_shape(); const size_t width = input_shape[ov::layout::width_idx(tensor_layout)]; const size_t height = input_shape[ov::layout::height_idx(tensor_layout)]; std::vector<std::shared_ptr<unsigned char>> images_data; std::vector<std::string> valid_image_names; for (const auto & i : image_names) { FormatReader::ReaderPtr reader(i.c_str()); if (reader. get() == nullptr) { cout << "Image " + i + " cannot be read!" << endl; continue; } //Collect image data std::shared_ptr<unsigned char> data(reader->getData(width, height)); if (data != nullptr) { images_data.push_back(data); valid_image_names.push_back(i); } } if (images_data.empty() || valid_image_names.empty()) throw std::logic_error("Valid input images were not found!"); // -------- Step 5. Loading model to the device -------- // Setting batch size using image count const size_t batchSize = images_data. size(); cout << "Set batch size " << std::to_string(batchSize) << endl; ov::set_batch(model, batchSize); // -------- Step 6. Loading model to the device -------- cout << "Loading model to the device " << endl; ov::CompiledModel compiled_model = core.compile_model(model, "CPU"); // -------- Step 7. Create infer request -------- cout << "Create infer request" << endl; ov::InferRequest infer_request = compiled_model.create_infer_request(); // -------- Step 8. Combine multiple input images as batch -------- ov::Tensor input_tensor = infer_request.get_input_tensor(); for (size_t image_id = 0; image_id < images_data. size(); + + image_id) { const size_t image_size = shape_size(model->input().get_shape()) / batchSize; std::memcpy(input_tensor.data<std::uint8_t>() + image_id * image_size, images_data[image_id].get(), image_size); } // -------- Step 9. Do asynchronous inference -------- size_t num_iterations = 10; size_t cur_iteration = 0; std::condition_variable condVar; std::mutex mutex; std::exception_ptr exception_var; // -------- Step 10. Do asynchronous inference -------- infer_request.set_callback([ & amp;](std::exception_ptr ex) { std::lock_guard<std::mutex> l(mutex); if (ex) { exception_var = ex; condVar. notify_all(); return; } cur_iteration++; cout << "Completed " << cur_iteration << " async request execution" << endl; if (cur_iteration < num_iterations) { // here a user can read output containing inference results and put new // input to repeat async request again infer_request.start_async(); } else { // continue sample execution after last Asynchronous inference request // execution condVar. notify_one(); } }); // Start async request for the first time cout << "Start inference (asynchronous executions)" << endl; infer_request.start_async(); // Wait all iterations of the async request std::unique_lock<std::mutex> lock(mutex); condVar.wait(lock, [ & amp;] { if (exception_var) { std::rethrow_exception(exception_var); } return cur_iteration == num_iterations; }); cout << "Completed async requests execution" << endl; // -------- Step 11. Process output -------- ov::Tensor output = infer_request.get_output_tensor(); } catch (const std::exception & ex) { cout << ex.what() << endl; return EXIT_FAILURE; } catch (...) { cout << "Unknown/internal exception happened." << endl; return EXIT_FAILURE; } return EXIT_SUCCESS; }
python sample code:
yolov5 target detection, missing weight file or code, please go to ultralytics/yolov5
import cv2 import math import numpy as np import time from typing import Tuple import torchvision import yaml import torch from openvino.runtime import Core, Tensor # Load COCO Label from yolov5/data/coco.yaml with open('./data/coco.yaml', 'r', encoding='utf-8') as f: result = yaml.load(f.read(), Loader=yaml.FullLoader) class_list = result['names'] # Step1: Create OpenVINO Runtime Core core = Core() # Step2: Compile the Model for the dedicated device: CPU/GPU.0/GPU.1... net = core.compile_model("./weights/yolov5s_openvino_model/yolov5s.xml", "CPU") # get input node and output node input_node = net.inputs[0] output_node = net. outputs[0] # Step 3. Create 1 Infer_request for current frame, 1 for next frame infer_request_curr = net.create_infer_request() infer_request_next = net.create_infer_request() #color palette colors = [(255, 255, 0), (0, 255, 0), (0, 255, 255), (255, 0, 0)] # import the letterbox for preprocess the frame # from utils.augmentations import letterbox image_paths = ["./images/bus.jpg", "./images/zidane.jpg"] # Get the current frame frame_curr = cv2.imread(image_paths[0]) # Preprocess the frame letterbox_img_curr, _, _ = letterbox(frame_curr, auto=False) # Normalization + Swap RB + Layout from HWC to NCHW blob = Tensor(cv2.dnn.blobFromImage(letterbox_img_curr, 1 / 255.0, swapRB=True)) # Transfer the blob into the model infer_request_curr.set_tensor(input_node, blob) # Start the current frame Async Inference infer_request_curr.start_async() for idx in range(100): # Calculate the end-to-end process throughput. start = time. time() # Get the next frame frame_next = cv2.imread(image_paths[idx%len(image_paths)]) # Preprocess the frame letterbox_img_next, _, _ = letterbox(frame_next, auto=False) # Normalization + Swap RB + Layout from HWC to NCHW blob = Tensor(cv2.dnn.blobFromImage(letterbox_img_next, 1 / 255.0, swapRB=True)) # Transfer the blob into the model infer_request_next.set_tensor(input_node, blob) # Start the next frame Async Inference infer_request_next.start_async() # wait for the current frame inference result infer_request_curr.wait() # Get the inference result from the output_node infer_result = infer_request_curr. get_tensor(output_node) # Postprocess the inference result data = torch.tensor(infer_result.data) # Postprocess of YOLOv5:NMS dets = non_max_suppression(data)[0].numpy() bboxes, scores, class_ids = dets[:, :4], dets[:, 4], dets[:, 5] # rescale the coordinates bboxes = scale_coords(letterbox_img_curr.shape[:-1], bboxes, frame_curr.shape[:-1]).astype(int) # show bbox of detections for bbox, score, class_id in zip(bboxes, scores, class_ids): color = colors[int(class_id) % len(colors)] cv2.rectangle(frame_curr, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2) cv2.rectangle(frame_curr, (bbox[0], bbox[1] - 20), (bbox[2], bbox[1]), color, -1) cv2.putText(frame_curr, class_list[class_id], (bbox[0], bbox[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, .5, (255, 255, 255)) end = time. time() # show FPS fps = (1 / (end - start)) fps_label = "Throughput: %.2f FPS" % fps cv2.putText(frame_curr, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) print(fps_label + "; Detections: " + str(len(class_ids))) cv2.imshow("Async API demo", frame_curr) # Swap the infer request infer_request_curr, infer_request_next = infer_request_next, infer_request_curr frame_curr = frame_next letterbox_img_curr = letterbox_img_next # wait key for ending if cv2.waitKey(1) > -1: print("finished by user") cv2.destroyAllWindows() break