目标检测 YOLO

Python 使用OpenCV

代码

import cv2
import numpy as np

CLASSES = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
colors = np.random.uniform(0, 255, size=(len(CLASSES), 3))

def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
    """
    Draws bounding boxes on the input image based on the provided arguments.

    Args:
        img (numpy.ndarray): The input image to draw the bounding box on.
        class_id (int): Class ID of the detected object.
        confidence (float): Confidence score of the detected object.
        x (int): X-coordinate of the top-left corner of the bounding box.
        y (int): Y-coordinate of the top-left corner of the bounding box.
        x_plus_w (int): X-coordinate of the bottom-right corner of the bounding box.
        y_plus_h (int): Y-coordinate of the bottom-right corner of the bounding box.
    """
    label = f'{CLASSES[class_id]} ({confidence:.2f})'
    color = colors[class_id]
    cv2.rectangle(img, (x, y), (x_plus_w, y_plus_h), color, 2)
    cv2.putText(img, label, (x - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)


def main(onnx_model, input_image):
    """
    Main function to load ONNX model, perform inference, draw bounding boxes, and display the output image.

    Args:
        onnx_model (str): Path to the ONNX model.
        input_image (str): Path to the input image.

    Returns:
        list: List of dictionaries containing detection information such as class_id, class_name, confidence, etc.
    """
    # Load the ONNX model
    model: cv2.dnn.Net = cv2.dnn.readNetFromONNX(onnx_model)

    # Read the input image
    original_image: np.ndarray = cv2.imread(input_image)
    [height, width, _] = original_image.shape

    # Prepare a square image for inference
    length = max((height, width))
    image = np.zeros((length, length, 3), np.uint8)
    image[0:height, 0:width] = original_image

    # Calculate scale factor
    scale = length / 640

    # Preprocess the image and prepare blob for model
    blob = cv2.dnn.blobFromImage(image, scalefactor=1 / 255, size=(640, 640), swapRB=True)
    model.setInput(blob)

    # Perform inference
    outputs = model.forward()

    # Prepare output array
    outputs = np.array([cv2.transpose(outputs[0])])
    rows = outputs.shape[1]

    boxes = []
    scores = []
    class_ids = []

    # Iterate through output to collect bounding boxes, confidence scores, and class IDs
    for i in range(rows):
        classes_scores = outputs[0][i][4:]
        (minScore, maxScore, minClassLoc, (x, maxClassIndex)) = cv2.minMaxLoc(classes_scores)
        if maxScore >= 0.25:
            box = [
                outputs[0][i][0] - (0.5 * outputs[0][i][2]), outputs[0][i][1] - (0.5 * outputs[0][i][3]),
                outputs[0][i][2], outputs[0][i][3]]
            boxes.append(box)
            scores.append(maxScore)
            class_ids.append(maxClassIndex)

    # Apply NMS (Non-maximum suppression)
    result_boxes = cv2.dnn.NMSBoxes(boxes, scores, 0.25, 0.45, 0.5)

    detections = []

    # Iterate through NMS results to draw bounding boxes and labels
    for i in range(len(result_boxes)):
        index = result_boxes[i]
        box = boxes[index]
        detection = {
            'class_id': class_ids[index],
            'class_name': CLASSES[class_ids[index]],
            'confidence': scores[index],
            'box': box,
            'scale': scale}
        detections.append(detection)
        draw_bounding_box(original_image, class_ids[index], scores[index], round(box[0] * scale), round(box[1] * scale),
                          round((box[0] + box[2]) * scale), round((box[1] + box[3]) * scale))

    # Display the image with bounding boxes
    cv2.imwrite("result.jpg", original_image)
    print(detections)
    return detections


if __name__ == '__main__':
  
    main("yolo.onnx", "163420.jpg")

参考

GitHub：ultralytics/examples/YOLOv8-OpenCV-ONNX-Python at main · ultralytics/ultralytics (github.com)

Python 使用 onnxruntime

ONNXRuntime安装

构建

引入

1
2
3

import onnxruntime
import cv2
import numpy as np

指定模型路径与运行方式：

1	oxxn_session = onnxruntime.InferenceSession(r"yolo.onnx", providers=["CUDAExecutionProvider"])

查看模型的输入输出层

# 模型有一个尺寸为[1, 3, 640, 640]的输入层和1个尺寸分别为[1, 14, 8400]的输出层。
for input in oxxn_session.get_inputs():
    print("input name: ", input.name)
    print("input shape: ", input.shape)
    print("input type: ", input.type)

for output in oxxn_session.get_outputs():
    print("output name: ", output.name)
    print("output shape: ", output.shape)
    print("output type: ", output.type)
"""
input name:  images
input shape:  [1, 3, 640, 640]
input type:  tensor(float)
output name:  output0
output shape:  [1, 14, 8400]
output type:  tensor(float)
"""

数据预处理

# 将图像预处理为shape (1, 3, 640, 640) 的归一化矩阵
def prepare_input(bgr_image, width, height):
    image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, (width, height)).astype(np.float32)
    image = image / 255.0
    # 由(h, w, c)转置为(c, h, w)=> (640, 640, 3) to (3, 640, 640)
    image = np.transpose(image, (2, 0, 1)) 
    # 增加一个维度=> (3, 640, 640) to (1, 3, 640, 640)
    input_tensor = np.expand_dims(image, axis=0)
    return input_tensor

将数据输入模型

image = cv2.imread(r'E:\Download\1\081614.jpg')
image_height, image_width, _ = image.shape
model_width, model_height = 640, 640
input_tensor = prepare_input(image, model_width, model_height)
# run(None,{'images':input_tensor})
outputs = oxxn_session.run(None, {oxxn_session.get_inputs()[0].name: input_tensor})

将结果降维与转换

output0 = np.squeeze(outputs[0]).transpose()

print("outputs    shape:", output0.shape)
print("outputs[0] shape:", np.array(outputs[0]).shape)
print("output0    shape:", output0.shape)

"""
outputs    shape: (8400, 14)
outputs[0] shape: (1, 14, 8400)
output0    shape: (8400, 14)
"""

这里面的目标检测结果是(8400,14)，其中 8400 是指可以检测出8400个目标框，14是指每个目标框的参数：4个坐标（x,y,w,h）+10个类别置信度

取出目标框

我们可以看到一个目标框的参数

boxes = output0[:, 0:14]
print("boxes shape:", boxes.shape)
print("boxes[0]:", boxes[0])
"""
boxes shape: (8400, 14)

array([2.5677011e+00, 4.6934624e+00, 7.8942657e+00, 9.5386562e+00,
       7.9870224e-06, 1.1920929e-06, 3.7074089e-05, 4.7683716e-07,
       1.4305115e-06, 8.3446503e-07, 2.3841858e-07, 2.2649765e-06,
       1.1920929e-07, 9.5367432e-07], dtype=float32)
"""

计算IOU

def iou(box1,box2):
    return intersection(box1, box2) / union(box1, box2)

def union(box1,box2):
    (box1_x1, box1_y1, box1_x2, box1_y2, _, _) = box1
    (box2_x1, box2_y1, box2_x2, box2_y2, _, _) = box2
    box1_area = (box1_x2 - box1_x1) * (box1_y2 - box1_y1)
    box2_area = (box2_x2 - box2_x1) * (box2_y2 - box2_y1)
    return box1_area + box2_area - intersection(box1, box2)

def intersection(box1,box2):
    (box1_x1, box1_y1, box1_x2, box1_y2, _, _) = box1
    (box2_x1, box2_y1, box2_x2, box2_y2, _, _) = box2
    x1 = max(box1_x1,box2_x1)
    y1 = max(box1_y1,box2_y1)
    x2 = min(box1_x2,box2_x2)
    y2 = min(box1_y2,box2_y2)
    return (x2 - x1) * (y2 - y1)

NMS操作

objects = []
NUM_CLASSES = ['0','1','2','3','4','5','6','7','8','9']
for box in boxes:
    prob = box[4:14].max()
    if prob < 0.25:
        continue
    class_id = box[4:14].argmax()
    label = NUM_CLASSES[class_id]
    xc, yc, w, h = box[:4]

    # 把x1, y1, x2, y2的坐标恢复到原始图像坐标
    x1 = (xc - w / 2) / model_width * image_width
    y1 = (yc - h / 2) / model_height * image_height
    x2 = (xc + w / 2) / model_width * image_width
    y2 = (yc + h / 2) / model_height * image_height

    # 将检测框的坐标、标签和概率添加到 boxes 向量中。
    objects.append((x1, y1, x2, y2, label, prob))
# NMS
objects.sort(key=lambda x: x[5], reverse=True)
results = []
while len(objects) > 0:
    results.append(objects[0])
    objects = [object for object in objects if iou(object, objects[0]) < 0.25]

最后的results就是NMS的结果。

1 2	# results[0] (990.6357421875,49.41483235359192,1024.59326171875,87.05558466911316,'1',0.9102485)

参考

知乎：AI模型部署 | onnxruntime部署YOLOv8分割模型详细教程

Rust

代码

依赖项

serde_json = "1.0"
headers = "0.3"
opencv = {version = "0.86.1"}

image = "0.24.6"
ndarray = "0.15.6"
ort = "1.15.2"

代码：

yolov8n 30ms

yolov8n 200ms

use image::{imageops::FilterType, GenericImageView};
use ndarray::{s, Array, Axis, IxDyn};
use ort::{Environment, SessionBuilder, Value};
use std::{path::Path, sync::Arc, vec, fs::File, io::Read};
use opencv::imgcodecs::{imread, imwrite};

// Handler of /detect POST endpoint
// Receives uploaded file with a name "image_file", passes it
// through YOLOv8 object detection network and returns and array
// of bounding boxes.
// Returns a JSON array of objects bounding boxes in format [(x1,y1,x2,y2,object_type,probability),..]
pub async fn detect() -> String {
    // let field = multipart.next_field().await.unwrap().unwrap();
    // let buf = field.bytes().await.unwrap().to_vec();

    let file = match File::open("assets/LevelScal/origin.jpg") {
        // `io::Error` 的 `description` 方法返回一个描述错误的字符串。
        Err(why) => panic!("couldn't open origin.jpg: {:?}", why),
        Ok(file) => file,
    };

    let buf = file.bytes().map(|x| x.unwrap()).collect::<Vec<u8>>();

    println!("buf.len:{}", buf.len());
    let boxes = detect_objects_on_image(buf);
    // println!("boxes: {:#?}", boxes);
    println!("boxes.len:{}",boxes.len());
    println!("{:#}",serde_json::to_string(&boxes).unwrap_or_default());
    return serde_json::to_string(&boxes).unwrap_or_default()
}

// Function receives an image,
// passes it through YOLOv8 neural network
// and returns an array of detected objects
// and their bounding boxes
// Returns Array of bounding boxes in format [(x1,y1,x2,y2,object_type,probability),..]
fn detect_objects_on_image(buf: Vec<u8>) -> Vec<(f32, f32, f32, f32, &'static str, f32)> {
    let (input, img_width, img_height) = prepare_input(buf);
    let output = run_model(input);
    return process_output(output, img_width, img_height);
}

// Function used to convert input image to tensor,
// required as an input to YOLOv8 object detection
// network.
// Returns the input tensor, original image width and height
fn prepare_input(buf: Vec<u8>) -> (Array<f32, IxDyn>, u32, u32) {
    let img = image::load_from_memory(&buf).unwrap();
    let (img_width, img_height) = (img.width(), img.height());
    // 将图像的大小精确调整为 640x640 像素。使用 CatmullRom 滤波器进行调整。
    let img = img.resize_exact(640, 640, FilterType::CatmullRom);
    // 创建一个大小为 (1, 3, 640, 640) 的数组，初始值为零。这个数组用于存储调整大小的图像数据，准备作为神经网络的输入。into_dyn 将数组转换为动态维度数组。
    let mut input = Array::zeros((1, 3, 640, 640)).into_dyn();
    // 遍历调整大小后的图像中的每个像素
    for pixel in img.pixels() {
        // 获取当前像素的 x 坐标（列索引）。
        let x = pixel.0 as usize;
        // 获取当前像素的 y 坐标（行索引）。
        let y = pixel.1 as usize;
        // 解构提取像素值，分别获取红色、绿色、蓝色通道的值。忽略了 alpha（透明度）通道。
        let [r, g, b, _] = pixel.2 .0;
        // 将红色通道的值转换为浮点数并归一化到 [0, 1] 范围，存储在输入数组的相应位置。
        input[[0, 0, y, x]] = (r as f32) / 255.0;
        // 绿色
        input[[0, 1, y, x]] = (g as f32) / 255.0;
        // 蓝色
        input[[0, 2, y, x]] = (b as f32) / 255.0;
    }
    return (input, img_width, img_height);
}

// 加载一个 ONNX 模型并运行它，将输入数据传递给模型，并获取处理后的输出
fn run_model(input: Array<f32, IxDyn>) -> Array<f32, IxDyn> {
    // 创建一个名为 YOLOv8 的 ONNX 运行时环境，并将其包装在 Arc（原子引用计数的智能指针）中，以便在多个线程间安全共享。
    let env = Arc::new(Environment::builder().with_name("YOLOv8").build().unwrap());
    // 使用前面创建的环境来构建一个新的会话（Session），并从文件 "yolov8x_best.onnx" 中加载模型。这里假设模型文件存在且有效。
    let model = SessionBuilder::new(&env)
        .unwrap()
        .with_model_from_file("assets/models/yolov8x_best.onnx")
        .unwrap();
    // 将输入数组转换为标准布局（内存布局），这是为了确保它符合 ONNX 运行时的要求。
    let input_as_values = &input.as_standard_layout();
    // 创建一个包含单个元素的向量，该元素是将输入数据封装成 ONNX 运行时能够理解的 Value 类型。
    let model_inputs = vec![Value::from_array(model.allocator(), input_as_values).unwrap()];
    // 运行模型并获取输出。这里假设模型能够成功运行，并且输出没有错误。
    let outputs = model.run(model_inputs).unwrap();
    // 从模型的输出中提取第一个元素（通常在深度学习模型中，输出是一个数组，可能包含多个输出）。
    // try_extract::<f32>() 尝试将输出提取为 f32 类型的数组。
    // view().t().into_owned() 转置视图并转换为拥有的数组（可能是为了匹配特定的输出格式）。
    let output = outputs
        .get(0)
        .unwrap()
        .try_extract::<f32>()
        .unwrap()
        .view()
        .t()
        .into_owned();
    return output;
}

/**
该函数用于处理 YOLO 等对象检测模型的输出，提取检测到的对象，包括它们的位置、标签和置信度。
函数还对检测框进行了非极大值抑制（通过 IOU 过滤），以消除重叠的检测框。
 */
fn process_output(
    output: Array<f32, IxDyn>,
    img_width: u32,
    img_height: u32,
) -> Vec<(f32, f32, f32, f32, &'static str, f32)> {
    // 创建一个新的可变向量 boxes，用于存储检测到的框。
    let mut boxes = Vec::new();
    // 对 output 数组进行切片，提取特定的维度或层。这里的具体操作取决于模型输出的结构。
    let output = output.slice(s![.., .., 0]);
    // 遍历输出数组的第一个轴（通常是按行遍历）。
    for row in output.axis_iter(Axis(0)) {
        // 将每行的迭代器转换为一个向量。
        let row: Vec<_> = row.iter().map(|x| *x).collect();
        // 遍历行中跳过前四个元素后的每个元素，并进行枚举（即附加索引）。
        // 将枚举的元素转换为 (index, value) 对。
        // 使用 reduce 方法找到具有最大概率的类别，即概率最高的对象检测。
        let (class_id, prob) = row
            .iter()
            .skip(4)
            .enumerate()
            .map(|(index, value)| (index, *value))
            .reduce(|accum, row| if row.1 > accum.1 { row } else { accum })
            .unwrap();
        // 如果概率低于 0.5，跳过当前行（对象检测不足够可信）。
        if prob < 0.5 {
            continue;
        }
        let label = YOLO_CLASSES[class_id];
        let xc = row[0] / 640.0 * (img_width as f32);
        let yc = row[1] / 640.0 * (img_height as f32);
        let w = row[2] / 640.0 * (img_width as f32);
        let h = row[3] / 640.0 * (img_height as f32);
        let x1 = xc - w / 2.0;
        let x2 = xc + w / 2.0;
        let y1 = yc - h / 2.0;
        let y2 = yc + h / 2.0;
        // 将检测框的坐标、标签和概率添加到 boxes 向量中。
        boxes.push((x1, y1, x2, y2, label, prob));
    }
    // 按概率降序排序检测框。
    boxes.sort_by(|box1, box2| box2.5.total_cmp(&box1.5));
    let mut result = Vec::new();
    while boxes.len() > 0 {
        // 将概率最高的检测框添加到结果中。
        result.push(boxes[0]);
        // 使用 IOU（交并比）过滤掉与当前最高概率检测框重叠度较高的其他检测框。
        boxes = boxes
            .iter()
            .filter(|box1| iou(&boxes[0], box1) < 0.7)
            .map(|x| *x)
            .collect()
    }
    return result;
}

// Function calculates "Intersection-over-union" coefficient for specified two boxes
// https://pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/.
// Returns Intersection over union ratio as a float number
fn iou(
    box1: &(f32, f32, f32, f32, &'static str, f32),
    box2: &(f32, f32, f32, f32, &'static str, f32),
) -> f32 {
    return intersection(box1, box2) / union(box1, box2);
}

// Function calculates union area of two boxes
// Returns Area of the boxes union as a float number
fn union(
    box1: &(f32, f32, f32, f32, &'static str, f32),
    box2: &(f32, f32, f32, f32, &'static str, f32),
) -> f32 {
    let (box1_x1, box1_y1, box1_x2, box1_y2, _, _) = *box1;
    let (box2_x1, box2_y1, box2_x2, box2_y2, _, _) = *box2;
    let box1_area = (box1_x2 - box1_x1) * (box1_y2 - box1_y1);
    let box2_area = (box2_x2 - box2_x1) * (box2_y2 - box2_y1);
    return box1_area + box2_area - intersection(box1, box2);
}

// Function calculates intersection area of two boxes
// Returns Area of intersection of the boxes as a float number
fn intersection(
    box1: &(f32, f32, f32, f32, &'static str, f32),
    box2: &(f32, f32, f32, f32, &'static str, f32),
) -> f32 {
    let (box1_x1, box1_y1, box1_x2, box1_y2, _, _) = *box1;
    let (box2_x1, box2_y1, box2_x2, box2_y2, _, _) = *box2;
    let x1 = box1_x1.max(box2_x1);
    let y1 = box1_y1.max(box2_y1);
    let x2 = box1_x2.min(box2_x2);
    let y2 = box1_y2.min(box2_y2);
    return (x2 - x1) * (y2 - y1);
}

// Array of YOLOv8 class labels
const YOLO_CLASSES: [&str; 10] = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"];

参考

github：AndreyGermanov/yolov8_onnx_rust: YOLOv8 inference using Rust (github.com)

GitHub：yolov5-api-rust/src/model.rs at

GitHub：yolo-screen-brightness/src/main.rs

`C#`

教程：使用 ONNX 深度学习模型检测对象 - ML.NET | Microsoft Learn

‍

2 语义分割 U-NET

python

代码

import cv2
import numpy as np

# 读取图片
image = cv2.imread(r"C:\Users\DELL\Desktop\rule\1.jpg")
# 将BGR图像转换为RGB格式
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# 获取图像的大小
ori_w, ori_h, = image.shape[0], image.shape[1]
print(ori_w, ori_h)
# 指定调整后的大小
new_width = 512
new_height = 512
# 图片尺寸缩放
resized_img = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
# 加载ONNX模型
net = cv2.dnn.readNetFromONNX(r"C:\D\Work\Water\20231023RiverSpeed\Vue\RiverSpeed_Rust\assets\models\unet_last.onnx")  # 加载训练好的识别模型
# onnx是多输出,每个输出都会对应一个name,因此需要获取所有输出的name
output_layer_names = net.getUnconnectedOutLayersNames()
# 归一化
mean = np.array([0.485, 0.456, 0.406]) * 255.0
scale = 1 / 255.0
std = [0.229, 0.224, 0.225]
# 由图片加载数据 进行缩放、归一化等预处理
input_blob = cv2.dnn.blobFromImage(
    image=resized_img,
    scalefactor=scale,
    size=(512, 512),  # img target size
    mean=mean,
    swapRB=True,  # BGR -> RGB
    crop=True  # center crop
)

# 将Blob设置为模型的输入
net.setInput(input_blob)
# 运行前向传播,将所有输出name作为参数传递
out = net.forward(output_layer_names)

def process_output(output, original_image):
    # 移除多余的维度
    output = output[0].squeeze()

    # 获取每个像素的类别（0 或 1）
    labels = np.argmax(output, axis=0)

    # 为每种类别创建遮罩
    mask_0 = np.zeros_like(original_image)
    mask_1 = np.zeros_like(original_image)

    # 为类别 0 使用红色，类别 1 使用绿色
    mask_0[labels == 0] = [0, 0, 255]  # 红色
    mask_1[labels == 1] = [0, 255, 0]  # 绿色

    # 叠加遮罩和原始图像
    alpha = 0.5  # 设置透明度
    segmented_image = cv2.addWeighted(original_image, 1, mask_0, alpha, 0)
    segmented_image = cv2.addWeighted(segmented_image, 1, mask_1, alpha, 0)

    return segmented_image

segmented_image = process_output(out, resized_img)

segmented_image = cv2.resize(segmented_image, (ori_h, ori_w), interpolation=cv2.INTER_AREA)

# 保存图像
cv2.imwrite('saved_opencv_python_image.png', segmented_image)

python

import copy
import onnxruntime
from PIL import Image
import cv2
import numpy as np

#---------------------------------------------------#
#   对输入图像进行resize
#---------------------------------------------------#
def resize_image(image, size):
    iw, ih  = image.size
    w, h    = size

    scale   = min(w/iw, h/ih) # 512/2560 = 0.2 512/1440 = 0.35
    nw      = int(iw*scale)   # 2560*0.2 = 512
    nh      = int(ih*scale)   # 1440*0.2 = 288

    image   = image.resize((nw,nh), Image.BICUBIC) # (512, 288)
    # image.save("unet_resize_1.jpg")
    new_image = Image.new('RGB', size, (128,128,128))   # (512, 512)
    new_image.paste(image, ((w-nw)//2, (h-nh)//2)) # 粘贴到中心 (512-512)//2 = 0 (512-288)//2 = 112

    return new_image, nw, nh

def get_input_feed(image_tensor):
    # 利用input_name获得输入的tensor
    input_feed={}
    for name in input_name:
        input_feed[name]=image_tensor
    return input_feed

onnx_session   = onnxruntime.InferenceSession("unet.onnx")

input_name = [node.name for node in onnx_session.get_inputs()]
output_name = [node.name for node in onnx_session.get_outputs()]

print(input_name)  # ['images']
print(output_name) # ['output']

input_shape = [512, 512]

num_classes = 2

image = Image.open("163420.jpg")
#---------------------------------------------------#
#   对输入图像进行一个备份，后面用于绘图
#---------------------------------------------------#
old_img     = copy.deepcopy(image)
orininal_h  = np.array(image).shape[0]
orininal_w  = np.array(image).shape[1]
#---------------------------------------------------------#
#   给图像增加灰条，实现不失真的resize
#   也可以直接resize进行识别
#---------------------------------------------------------#
image_data, nw, nh  = resize_image(image, (input_shape[0],input_shape[1]))
# image_data.save("unet_size_change.jpg")
print(np.shape(image_data)) # (512, 512, 3)
#---------------------------------------------------------#
#   添加 1 维度
#---------------------------------------------------------#
image_data  = np.expand_dims(np.transpose((np.array(image_data, np.float32)/255.0), (2, 0, 1)), 0)
print(np.shape(image_data)) # (1, 3, 512, 512)

input_feed  = get_input_feed(image_data)
pr          = onnx_session.run(output_names=output_name, input_feed=input_feed)[0][0]
print(np.shape(pr)) # (2, 512, 512)

#---------------------------------------------------#
#   取出每一个像素点的种类
#---------------------------------------------------#
def softmax(x, axis):
    # 依次对比两个(512,512)值 取(2)最大值, 保持维度不变=>(512,512,1)
    # 然后减去
    x -= np.max(x, axis=axis, keepdims=True) 
    # 指数函数,为了归一化 保持维度不变=>(512,512,2)
    f_x = np.exp(x) / np.sum(np.exp(x), axis=axis, keepdims=True)
    return f_x

pr = softmax(np.transpose(pr, (1, 2, 0)), -1)
print(np.shape(pr)) # (512, 512, 2)
#--------------------------------------#
#   将灰条部分截取掉
#--------------------------------------#
pr = pr[int((input_shape[0] - nh) // 2) : int((input_shape[0] - nh) // 2 + nh), \
        int((input_shape[1] - nw) // 2) : int((input_shape[1] - nw) // 2 + nw)]
print(np.shape(pr)) #(288, 512, 2)
#---------------------------------------------------#
#   进行图片的resize
#---------------------------------------------------#
pr = cv2.resize(pr, (orininal_w, orininal_h), interpolation = cv2.INTER_LINEAR)
print(np.shape(pr)) # (1440, 2560, 2)
#---------------------------------------------------#
#   取出每一个像素点的种类
#---------------------------------------------------#
pr = pr.argmax(axis=-1)
print(np.shape(pr)) # (1440, 2560)

colors = [ (0, 0, 0), (128, 0, 0), (0, 128, 0)]

if 0 == 0:
    seg_img = np.reshape(np.array(colors, np.uint8)[np.reshape(pr, [-1])], [orininal_h, orininal_w, -1])
    #------------------------------------------------#
    #   将新图片转换成Image的形式
    #------------------------------------------------#
    image   = Image.fromarray(np.uint8(seg_img))
    image.save("unet2.png")
    #------------------------------------------------#
    #   将新图与原图及进行混合
    #------------------------------------------------#
    image   = Image.blend(old_img, image, 0.7)
  
    image.save("unet.png")

参考

OpenCV官方文档：OpenCV: Conversion of PyTorch Classification Models and Launch with OpenCV Python

OpenCV官方代码：opencv/samples/dnn/classification.py at master · opencv/opencv (github.com)

CSDN博客：【深度学习】【Opencv】【CPU】Python/C++调用onnx模型【基础】

‍

Rust

这部分涉及到OpenCV的图像读取、图像缩放、模型加载、图像预处理、矩阵构建、矩阵重塑、矩阵改值、权重叠加操作。

大致了解：OpenCV中Mat的属性

如果使用OpenCV：Mat to ArrayBase

https://docs.rs/ort/latest/ort/

代码

use opencv::core::{self, MatTrait, MatTraitConst, Scalar, Vector, CV_32F, CV_32FC1, CV_8UC3};
use opencv::dnn::{blob_from_image, read_net_from_onnx, NetTrait, NetTraitConst};
use opencv::imgcodecs::{imread, imwrite};
use opencv::prelude::*;

use std::process::exit;

pub async fn detect() -> String {
    let image = imread("assets/unet/1.jpg", opencv::imgcodecs::IMREAD_COLOR).unwrap();
    // let mut image_rgb = Mat::default();
    // opencv::imgproc::cvt_color(&image, &mut image_rgb, COLOR_BGR2RGB, 0);

    // 获取图像的大小
    let origin_height = image.size().unwrap().height;
    let origin_width = image.size().unwrap().width;

    // 指定调整后的大小
    let new_height = 512;
    let new_width = 512;

    println!("h: {}, w: {}", origin_height, origin_width);

    // 图片尺寸缩放
    let mut resized_img = Mat::default();
    opencv::imgproc::resize(
        &image,
        &mut resized_img,
        core::Size_ {
            width: new_width,
            height: new_height,
        },
        0.0,
        0.0,
        opencv::imgproc::INTER_AREA,
    )
    .unwrap();

    // 加载模型
    let mut network = read_net_from_onnx("assets/models/unet_last.onnx").unwrap();
    // onnx是多输出,每个输出都会对应一个name,因此需要获取所有输出的name
    let output_layer_names = network.get_unconnected_out_layers_names().unwrap();

    // 归一化
    let mean = Scalar::new(0.485, 0.456, 0.406, 0.0) * 255.0;
    let scale = 1.0 / 255.0;
    // let std = Scalar::new(0.229, 0.224, 0.225, 0.0);

    let blob = blob_from_image(
        &resized_img,
        scale,
        opencv::core::Size_ {
            width: new_width,
            height: new_height,
        },
        mean,
        true,
        false,
        CV_32F,
    )
    .unwrap_or_else(|e| {
        eprintln!("出问题了. {}", e);
        exit(1)
    });

    network
        .set_input(&blob, "", 1.0, core::Scalar::default())
        .unwrap();

    let mut outs: Vector<Mat> = Vector::default();

    network
        .forward(&mut outs, &output_layer_names)
        .unwrap_or_else(|e| {
            eprintln!("出问题了. {}", e);
            exit(1)
        });

    // dbg!(&outs);

    // let size = outs.get(0).unwrap().size().unwrap();
    // println!("size: {:#?}", size);
    // let mut scharrx = core::Mat::new_rows_cols_with_data(size.height, size.width, CV_8UC1, outs.ptr(0), Mat_AUTO_STEP).unwrap();

    let last_row_norm = process_output(&outs, &resized_img);

    return serde_json::to_string(&last_row_norm).unwrap_or_default();
}

/// Process predictions and apply NMS.
fn process_output(output: &core::Vector<core::Mat>, origin_image: &core::Mat) -> f32{
    let mut labels = Mat::default();
    let output_4d = output.get(0).unwrap();

    let output_3d = output_4d.reshape_nd(0, &[2, 512, 512]).unwrap();

    // let mut output_3d_0 = output_3d.

    unsafe { labels.create_nd(&[512, 512], CV_32FC1).unwrap() };

    for i in 0..(512 - 1) {
        for j in 0..(512 - 1) {
            let temp = output_3d.at_nd::<f32>(&[0, i, j]).unwrap();
            let temp1 = output_3d.at_nd::<f32>(&[1, i, j]).unwrap();
            let tt = labels.at_2d_mut::<f32>(i, j).unwrap();
            if *temp > *temp1 {
                *tt = 0.0;
            } else {
                *tt = 1.0;
            }
        }
    }

    let mut mask0 = Mat::default();
    unsafe { mask0.create_nd(&[512, 512], CV_8UC3).unwrap() };

    let mut mask1 = mask0.clone();

    let mut last_row_id = 0;
    for i in 0..(512 - 1) {
        for j in 0..(512 - 1) {
            let t = labels.at_2d::<f32>(i, j).unwrap();
            if *t == 0.0 { // 对背景区域进行着色
                // bgr
                let temp = mask0.at_2d_mut::<core::Vec3b>(i, j).unwrap();
                temp[0] = 0;   // 蓝
                temp[1] = 255; // 绿
                temp[2] = 0;   // 红
            } else {    // 对识别区域进行着色
                last_row_id = i;
                let temp = mask1.at_2d_mut::<core::Vec3b>(i, j).unwrap();
                temp[0] = 0;
                temp[1] = 255;  // 绿
                temp[2] = 0;
            }
        }
    }
    let last_row_norm = last_row_id as f32 / 512.0;

    let mut segmented_image = Mat::default();
    let mut origin_image_8u = Mat::default();

    origin_image
        .convert_to(&mut origin_image_8u, CV_8UC3, 1.0, 0.0)
        .unwrap();

    opencv::core::add_weighted(
        &origin_image_8u,
        1.0,
        &mask0,
        0.5,
        0.0,
        &mut segmented_image,
        -1,
    )
    .unwrap();
    opencv::core::add_weighted(
        &origin_image_8u,
        1.0,
        &mask1,
        0.5,
        0.0,
        &mut segmented_image,
        -1,
    )
    .unwrap();

    opencv::imgproc::resize(
        &segmented_image,
        &mut origin_image_8u,
        core::Size_ {
            width: 1080,
            height: 1920,
        },
        0.0,
        0.0,
        opencv::imgproc::INTER_AREA,
    )
    .unwrap();

    let v = core::Vector::new();

    match imwrite("mask0.jpg", &mask0, &v) {
        Ok(value) => {
            if value == false {
                println!("保存图片失败");
            } else {
                println!("保存图片成功");
            }
        }
        Err(e) => {
            println!("保存图片失败:{}", e);
        }
    };

    return last_row_norm;

}

参考

CSDN：OpenCV3 通道和位深以及对应的Vec的理解含义整理_depth' is 1 (cv_8s)

GitHub C++：Text-Detection/main.cpp at main · Kurmangozhin/Text-Detection (github.com)

GitHub C++：Easy_Unet/main.cpp at master · dgl547437235/Easy_Unet (github.com)

GitHub Rust at_2d：Code search results (github.com)

GitHub Rust at_nd：Code search results (github.com)

目标检测 YOLO

Python 使用OpenCV

代码

参考

Python 使用 onnxruntime

构建

引入

指定模型路径与运行方式：

查看模型的输入输出层

数据预处理

将数据输入模型

将结果降维与转换

取出目标框

计算IOU

NMS操作

参考

Rust

代码

参考

C#​

2 语义分割 U-NET

python

代码

python

参考

Rust

代码

参考

`C#`