目标检测 YOLO

Python 使用OpenCV

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import cv2
import numpy as np

CLASSES = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
colors = np.random.uniform(0, 255, size=(len(CLASSES), 3))

def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
"""
Draws bounding boxes on the input image based on the provided arguments.

Args:
img (numpy.ndarray): The input image to draw the bounding box on.
class_id (int): Class ID of the detected object.
confidence (float): Confidence score of the detected object.
x (int): X-coordinate of the top-left corner of the bounding box.
y (int): Y-coordinate of the top-left corner of the bounding box.
x_plus_w (int): X-coordinate of the bottom-right corner of the bounding box.
y_plus_h (int): Y-coordinate of the bottom-right corner of the bounding box.
"""
label = f'{CLASSES[class_id]} ({confidence:.2f})'
color = colors[class_id]
cv2.rectangle(img, (x, y), (x_plus_w, y_plus_h), color, 2)
cv2.putText(img, label, (x - 10, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)


def main(onnx_model, input_image):
"""
Main function to load ONNX model, perform inference, draw bounding boxes, and display the output image.

Args:
onnx_model (str): Path to the ONNX model.
input_image (str): Path to the input image.

Returns:
list: List of dictionaries containing detection information such as class_id, class_name, confidence, etc.
"""
# Load the ONNX model
model: cv2.dnn.Net = cv2.dnn.readNetFromONNX(onnx_model)

# Read the input image
original_image: np.ndarray = cv2.imread(input_image)
[height, width, _] = original_image.shape

# Prepare a square image for inference
length = max((height, width))
image = np.zeros((length, length, 3), np.uint8)
image[0:height, 0:width] = original_image

# Calculate scale factor
scale = length / 640

# Preprocess the image and prepare blob for model
blob = cv2.dnn.blobFromImage(image, scalefactor=1 / 255, size=(640, 640), swapRB=True)
model.setInput(blob)

# Perform inference
outputs = model.forward()

# Prepare output array
outputs = np.array([cv2.transpose(outputs[0])])
rows = outputs.shape[1]

boxes = []
scores = []
class_ids = []

# Iterate through output to collect bounding boxes, confidence scores, and class IDs
for i in range(rows):
classes_scores = outputs[0][i][4:]
(minScore, maxScore, minClassLoc, (x, maxClassIndex)) = cv2.minMaxLoc(classes_scores)
if maxScore >= 0.25:
box = [
outputs[0][i][0] - (0.5 * outputs[0][i][2]), outputs[0][i][1] - (0.5 * outputs[0][i][3]),
outputs[0][i][2], outputs[0][i][3]]
boxes.append(box)
scores.append(maxScore)
class_ids.append(maxClassIndex)

# Apply NMS (Non-maximum suppression)
result_boxes = cv2.dnn.NMSBoxes(boxes, scores, 0.25, 0.45, 0.5)

detections = []

# Iterate through NMS results to draw bounding boxes and labels
for i in range(len(result_boxes)):
index = result_boxes[i]
box = boxes[index]
detection = {
'class_id': class_ids[index],
'class_name': CLASSES[class_ids[index]],
'confidence': scores[index],
'box': box,
'scale': scale}
detections.append(detection)
draw_bounding_box(original_image, class_ids[index], scores[index], round(box[0] * scale), round(box[1] * scale),
round((box[0] + box[2]) * scale), round((box[1] + box[3]) * scale))

# Display the image with bounding boxes
cv2.imwrite("result.jpg", original_image)
print(detections)
return detections


if __name__ == '__main__':

main("yolo.onnx", "163420.jpg")

参考

GitHub:ultralytics/examples/YOLOv8-OpenCV-ONNX-Python at main · ultralytics/ultralytics (github.com)

Python 使用 onnxruntime

ONNXRuntime安装

构建

引入

1
2
3
import onnxruntime
import cv2
import numpy as np

指定模型路径与运行方式:

1
oxxn_session = onnxruntime.InferenceSession(r"yolo.onnx", providers=["CUDAExecutionProvider"])

查看模型的输入输出层

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# 模型有一个尺寸为[1, 3, 640, 640]的输入层和1个尺寸分别为[1, 14, 8400]的输出层。
for input in oxxn_session.get_inputs():
print("input name: ", input.name)
print("input shape: ", input.shape)
print("input type: ", input.type)

for output in oxxn_session.get_outputs():
print("output name: ", output.name)
print("output shape: ", output.shape)
print("output type: ", output.type)
"""
input name: images
input shape: [1, 3, 640, 640]
input type: tensor(float)
output name: output0
output shape: [1, 14, 8400]
output type: tensor(float)
"""

数据预处理

1
2
3
4
5
6
7
8
9
10
# 将图像预处理为shape (1, 3, 640, 640) 的归一化矩阵
def prepare_input(bgr_image, width, height):
image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)
image = cv2.resize(image, (width, height)).astype(np.float32)
image = image / 255.0
# 由(h, w, c)转置为(c, h, w)=> (640, 640, 3) to (3, 640, 640)
image = np.transpose(image, (2, 0, 1))
# 增加一个维度=> (3, 640, 640) to (1, 3, 640, 640)
input_tensor = np.expand_dims(image, axis=0)
return input_tensor

将数据输入模型

1
2
3
4
5
6
image = cv2.imread(r'E:\Download\1\081614.jpg')
image_height, image_width, _ = image.shape
model_width, model_height = 640, 640
input_tensor = prepare_input(image, model_width, model_height)
# run(None,{'images':input_tensor})
outputs = oxxn_session.run(None, {oxxn_session.get_inputs()[0].name: input_tensor})

将结果降维与转换

1
2
3
4
5
6
7
8
9
10
11
output0 = np.squeeze(outputs[0]).transpose()

print("outputs shape:", output0.shape)
print("outputs[0] shape:", np.array(outputs[0]).shape)
print("output0 shape:", output0.shape)

"""
outputs shape: (8400, 14)
outputs[0] shape: (1, 14, 8400)
output0 shape: (8400, 14)
"""

这里面的目标检测结果是(8400,14),其中 8400 是指可以检测出8400个目标框,14是指每个目标框的参数:4个坐标(x,y,w,h)​+10个类别置信度

取出目标框

我们可以看到一个目标框的参数

1
2
3
4
5
6
7
8
9
10
11
boxes = output0[:, 0:14]
print("boxes shape:", boxes.shape)
print("boxes[0]:", boxes[0])
"""
boxes shape: (8400, 14)

array([2.5677011e+00, 4.6934624e+00, 7.8942657e+00, 9.5386562e+00,
7.9870224e-06, 1.1920929e-06, 3.7074089e-05, 4.7683716e-07,
1.4305115e-06, 8.3446503e-07, 2.3841858e-07, 2.2649765e-06,
1.1920929e-07, 9.5367432e-07], dtype=float32)
"""

计算IOU

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
def iou(box1,box2):
return intersection(box1, box2) / union(box1, box2)

def union(box1,box2):
(box1_x1, box1_y1, box1_x2, box1_y2, _, _) = box1
(box2_x1, box2_y1, box2_x2, box2_y2, _, _) = box2
box1_area = (box1_x2 - box1_x1) * (box1_y2 - box1_y1)
box2_area = (box2_x2 - box2_x1) * (box2_y2 - box2_y1)
return box1_area + box2_area - intersection(box1, box2)

def intersection(box1,box2):
(box1_x1, box1_y1, box1_x2, box1_y2, _, _) = box1
(box2_x1, box2_y1, box2_x2, box2_y2, _, _) = box2
x1 = max(box1_x1,box2_x1)
y1 = max(box1_y1,box2_y1)
x2 = min(box1_x2,box2_x2)
y2 = min(box1_y2,box2_y2)
return (x2 - x1) * (y2 - y1)

NMS操作

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
objects = []
NUM_CLASSES = ['0','1','2','3','4','5','6','7','8','9']
for box in boxes:
prob = box[4:14].max()
if prob < 0.25:
continue
class_id = box[4:14].argmax()
label = NUM_CLASSES[class_id]
xc, yc, w, h = box[:4]

# 把x1, y1, x2, y2的坐标恢复到原始图像坐标
x1 = (xc - w / 2) / model_width * image_width
y1 = (yc - h / 2) / model_height * image_height
x2 = (xc + w / 2) / model_width * image_width
y2 = (yc + h / 2) / model_height * image_height

# 将检测框的坐标、标签和概率添加到 boxes 向量中。
objects.append((x1, y1, x2, y2, label, prob))
# NMS
objects.sort(key=lambda x: x[5], reverse=True)
results = []
while len(objects) > 0:
results.append(objects[0])
objects = [object for object in objects if iou(object, objects[0]) < 0.25]

最后的results​就是NMS的结果。

1
2
# results[0]
(990.6357421875,49.41483235359192,1024.59326171875,87.05558466911316,'1',0.9102485)

参考

知乎:AI模型部署 | onnxruntime部署YOLOv8分割模型详细教程

Rust

代码

依赖项

1
2
3
4
5
6
7
serde_json = "1.0"
headers = "0.3"
opencv = {version = "0.86.1"}

image = "0.24.6"
ndarray = "0.15.6"
ort = "1.15.2"

代码:

yolov8n 30ms

yolov8n 200ms

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
use image::{imageops::FilterType, GenericImageView};
use ndarray::{s, Array, Axis, IxDyn};
use ort::{Environment, SessionBuilder, Value};
use std::{path::Path, sync::Arc, vec, fs::File, io::Read};
use opencv::imgcodecs::{imread, imwrite};

// Handler of /detect POST endpoint
// Receives uploaded file with a name "image_file", passes it
// through YOLOv8 object detection network and returns and array
// of bounding boxes.
// Returns a JSON array of objects bounding boxes in format [(x1,y1,x2,y2,object_type,probability),..]
pub async fn detect() -> String {
// let field = multipart.next_field().await.unwrap().unwrap();
// let buf = field.bytes().await.unwrap().to_vec();

let file = match File::open("assets/LevelScal/origin.jpg") {
// `io::Error` 的 `description` 方法返回一个描述错误的字符串。
Err(why) => panic!("couldn't open origin.jpg: {:?}", why),
Ok(file) => file,
};

let buf = file.bytes().map(|x| x.unwrap()).collect::<Vec<u8>>();

println!("buf.len:{}", buf.len());
let boxes = detect_objects_on_image(buf);
// println!("boxes: {:#?}", boxes);
println!("boxes.len:{}",boxes.len());
println!("{:#}",serde_json::to_string(&boxes).unwrap_or_default());
return serde_json::to_string(&boxes).unwrap_or_default()
}

// Function receives an image,
// passes it through YOLOv8 neural network
// and returns an array of detected objects
// and their bounding boxes
// Returns Array of bounding boxes in format [(x1,y1,x2,y2,object_type,probability),..]
fn detect_objects_on_image(buf: Vec<u8>) -> Vec<(f32, f32, f32, f32, &'static str, f32)> {
let (input, img_width, img_height) = prepare_input(buf);
let output = run_model(input);
return process_output(output, img_width, img_height);
}

// Function used to convert input image to tensor,
// required as an input to YOLOv8 object detection
// network.
// Returns the input tensor, original image width and height
fn prepare_input(buf: Vec<u8>) -> (Array<f32, IxDyn>, u32, u32) {
let img = image::load_from_memory(&buf).unwrap();
let (img_width, img_height) = (img.width(), img.height());
// 将图像的大小精确调整为 640x640 像素。使用 CatmullRom 滤波器进行调整。
let img = img.resize_exact(640, 640, FilterType::CatmullRom);
// 创建一个大小为 (1, 3, 640, 640) 的数组,初始值为零。这个数组用于存储调整大小的图像数据,准备作为神经网络的输入。into_dyn 将数组转换为动态维度数组。
let mut input = Array::zeros((1, 3, 640, 640)).into_dyn();
// 遍历调整大小后的图像中的每个像素
for pixel in img.pixels() {
// 获取当前像素的 x 坐标(列索引)。
let x = pixel.0 as usize;
// 获取当前像素的 y 坐标(行索引)。
let y = pixel.1 as usize;
// 解构提取像素值,分别获取红色、绿色、蓝色通道的值。忽略了 alpha(透明度)通道。
let [r, g, b, _] = pixel.2 .0;
// 将红色通道的值转换为浮点数并归一化到 [0, 1] 范围,存储在输入数组的相应位置。
input[[0, 0, y, x]] = (r as f32) / 255.0;
// 绿色
input[[0, 1, y, x]] = (g as f32) / 255.0;
// 蓝色
input[[0, 2, y, x]] = (b as f32) / 255.0;
}
return (input, img_width, img_height);
}

// 加载一个 ONNX 模型并运行它,将输入数据传递给模型,并获取处理后的输出
fn run_model(input: Array<f32, IxDyn>) -> Array<f32, IxDyn> {
// 创建一个名为 YOLOv8 的 ONNX 运行时环境,并将其包装在 Arc(原子引用计数的智能指针)中,以便在多个线程间安全共享。
let env = Arc::new(Environment::builder().with_name("YOLOv8").build().unwrap());
// 使用前面创建的环境来构建一个新的会话(Session),并从文件 "yolov8x_best.onnx" 中加载模型。这里假设模型文件存在且有效。
let model = SessionBuilder::new(&env)
.unwrap()
.with_model_from_file("assets/models/yolov8x_best.onnx")
.unwrap();
// 将输入数组转换为标准布局(内存布局),这是为了确保它符合 ONNX 运行时的要求。
let input_as_values = &input.as_standard_layout();
// 创建一个包含单个元素的向量,该元素是将输入数据封装成 ONNX 运行时能够理解的 Value 类型。
let model_inputs = vec![Value::from_array(model.allocator(), input_as_values).unwrap()];
// 运行模型并获取输出。这里假设模型能够成功运行,并且输出没有错误。
let outputs = model.run(model_inputs).unwrap();
// 从模型的输出中提取第一个元素(通常在深度学习模型中,输出是一个数组,可能包含多个输出)。
// try_extract::<f32>() 尝试将输出提取为 f32 类型的数组。
// view().t().into_owned() 转置视图并转换为拥有的数组(可能是为了匹配特定的输出格式)。
let output = outputs
.get(0)
.unwrap()
.try_extract::<f32>()
.unwrap()
.view()
.t()
.into_owned();
return output;
}

/**
该函数用于处理 YOLO 等对象检测模型的输出,提取检测到的对象,包括它们的位置、标签和置信度。
函数还对检测框进行了非极大值抑制(通过 IOU 过滤),以消除重叠的检测框。
*/
fn process_output(
output: Array<f32, IxDyn>,
img_width: u32,
img_height: u32,
) -> Vec<(f32, f32, f32, f32, &'static str, f32)> {
// 创建一个新的可变向量 boxes,用于存储检测到的框。
let mut boxes = Vec::new();
// 对 output 数组进行切片,提取特定的维度或层。这里的具体操作取决于模型输出的结构。
let output = output.slice(s![.., .., 0]);
// 遍历输出数组的第一个轴(通常是按行遍历)。
for row in output.axis_iter(Axis(0)) {
// 将每行的迭代器转换为一个向量。
let row: Vec<_> = row.iter().map(|x| *x).collect();
// 遍历行中跳过前四个元素后的每个元素,并进行枚举(即附加索引)。
// 将枚举的元素转换为 (index, value) 对。
// 使用 reduce 方法找到具有最大概率的类别,即概率最高的对象检测。
let (class_id, prob) = row
.iter()
.skip(4)
.enumerate()
.map(|(index, value)| (index, *value))
.reduce(|accum, row| if row.1 > accum.1 { row } else { accum })
.unwrap();
// 如果概率低于 0.5,跳过当前行(对象检测不足够可信)。
if prob < 0.5 {
continue;
}
let label = YOLO_CLASSES[class_id];
let xc = row[0] / 640.0 * (img_width as f32);
let yc = row[1] / 640.0 * (img_height as f32);
let w = row[2] / 640.0 * (img_width as f32);
let h = row[3] / 640.0 * (img_height as f32);
let x1 = xc - w / 2.0;
let x2 = xc + w / 2.0;
let y1 = yc - h / 2.0;
let y2 = yc + h / 2.0;
// 将检测框的坐标、标签和概率添加到 boxes 向量中。
boxes.push((x1, y1, x2, y2, label, prob));
}
// 按概率降序排序检测框。
boxes.sort_by(|box1, box2| box2.5.total_cmp(&box1.5));
let mut result = Vec::new();
while boxes.len() > 0 {
// 将概率最高的检测框添加到结果中。
result.push(boxes[0]);
// 使用 IOU(交并比)过滤掉与当前最高概率检测框重叠度较高的其他检测框。
boxes = boxes
.iter()
.filter(|box1| iou(&boxes[0], box1) < 0.7)
.map(|x| *x)
.collect()
}
return result;
}

// Function calculates "Intersection-over-union" coefficient for specified two boxes
// https://pyimagesearch.com/2016/11/07/intersection-over-union-iou-for-object-detection/.
// Returns Intersection over union ratio as a float number
fn iou(
box1: &(f32, f32, f32, f32, &'static str, f32),
box2: &(f32, f32, f32, f32, &'static str, f32),
) -> f32 {
return intersection(box1, box2) / union(box1, box2);
}

// Function calculates union area of two boxes
// Returns Area of the boxes union as a float number
fn union(
box1: &(f32, f32, f32, f32, &'static str, f32),
box2: &(f32, f32, f32, f32, &'static str, f32),
) -> f32 {
let (box1_x1, box1_y1, box1_x2, box1_y2, _, _) = *box1;
let (box2_x1, box2_y1, box2_x2, box2_y2, _, _) = *box2;
let box1_area = (box1_x2 - box1_x1) * (box1_y2 - box1_y1);
let box2_area = (box2_x2 - box2_x1) * (box2_y2 - box2_y1);
return box1_area + box2_area - intersection(box1, box2);
}

// Function calculates intersection area of two boxes
// Returns Area of intersection of the boxes as a float number
fn intersection(
box1: &(f32, f32, f32, f32, &'static str, f32),
box2: &(f32, f32, f32, f32, &'static str, f32),
) -> f32 {
let (box1_x1, box1_y1, box1_x2, box1_y2, _, _) = *box1;
let (box2_x1, box2_y1, box2_x2, box2_y2, _, _) = *box2;
let x1 = box1_x1.max(box2_x1);
let y1 = box1_y1.max(box2_y1);
let x2 = box1_x2.min(box2_x2);
let y2 = box1_y2.min(box2_y2);
return (x2 - x1) * (y2 - y1);
}

// Array of YOLOv8 class labels
const YOLO_CLASSES: [&str; 10] = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"];

参考

github:AndreyGermanov/yolov8_onnx_rust: YOLOv8 inference using Rust (github.com)

GitHub:yolov5-api-rust/src/model.rs at

GitHub:yolo-screen-brightness/src/main.rs

C#

教程:使用 ONNX 深度学习模型检测对象 - ML.NET | Microsoft Learn

2 语义分割 U-NET

python

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import cv2
import numpy as np

# 读取图片
image = cv2.imread(r"C:\Users\DELL\Desktop\rule\1.jpg")
# 将BGR图像转换为RGB格式
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# 获取图像的大小
ori_w, ori_h, = image.shape[0], image.shape[1]
print(ori_w, ori_h)
# 指定调整后的大小
new_width = 512
new_height = 512
# 图片尺寸缩放
resized_img = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
# 加载ONNX模型
net = cv2.dnn.readNetFromONNX(r"C:\D\Work\Water\20231023RiverSpeed\Vue\RiverSpeed_Rust\assets\models\unet_last.onnx") # 加载训练好的识别模型
# onnx是多输出,每个输出都会对应一个name,因此需要获取所有输出的name
output_layer_names = net.getUnconnectedOutLayersNames()
# 归一化
mean = np.array([0.485, 0.456, 0.406]) * 255.0
scale = 1 / 255.0
std = [0.229, 0.224, 0.225]
# 由图片加载数据 进行缩放、归一化等预处理
input_blob = cv2.dnn.blobFromImage(
image=resized_img,
scalefactor=scale,
size=(512, 512), # img target size
mean=mean,
swapRB=True, # BGR -> RGB
crop=True # center crop
)

# 将Blob设置为模型的输入
net.setInput(input_blob)
# 运行前向传播,将所有输出name作为参数传递
out = net.forward(output_layer_names)

def process_output(output, original_image):
# 移除多余的维度
output = output[0].squeeze()

# 获取每个像素的类别(0 或 1)
labels = np.argmax(output, axis=0)

# 为每种类别创建遮罩
mask_0 = np.zeros_like(original_image)
mask_1 = np.zeros_like(original_image)

# 为类别 0 使用红色,类别 1 使用绿色
mask_0[labels == 0] = [0, 0, 255] # 红色
mask_1[labels == 1] = [0, 255, 0] # 绿色

# 叠加遮罩和原始图像
alpha = 0.5 # 设置透明度
segmented_image = cv2.addWeighted(original_image, 1, mask_0, alpha, 0)
segmented_image = cv2.addWeighted(segmented_image, 1, mask_1, alpha, 0)

return segmented_image

segmented_image = process_output(out, resized_img)

segmented_image = cv2.resize(segmented_image, (ori_h, ori_w), interpolation=cv2.INTER_AREA)

# 保存图像
cv2.imwrite('saved_opencv_python_image.png', segmented_image)

python

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import copy
import onnxruntime
from PIL import Image
import cv2
import numpy as np

#---------------------------------------------------#
# 对输入图像进行resize
#---------------------------------------------------#
def resize_image(image, size):
iw, ih = image.size
w, h = size

scale = min(w/iw, h/ih) # 512/2560 = 0.2 512/1440 = 0.35
nw = int(iw*scale) # 2560*0.2 = 512
nh = int(ih*scale) # 1440*0.2 = 288

image = image.resize((nw,nh), Image.BICUBIC) # (512, 288)
# image.save("unet_resize_1.jpg")
new_image = Image.new('RGB', size, (128,128,128)) # (512, 512)
new_image.paste(image, ((w-nw)//2, (h-nh)//2)) # 粘贴到中心 (512-512)//2 = 0 (512-288)//2 = 112

return new_image, nw, nh

def get_input_feed(image_tensor):
# 利用input_name获得输入的tensor
input_feed={}
for name in input_name:
input_feed[name]=image_tensor
return input_feed

onnx_session = onnxruntime.InferenceSession("unet.onnx")

input_name = [node.name for node in onnx_session.get_inputs()]
output_name = [node.name for node in onnx_session.get_outputs()]

print(input_name) # ['images']
print(output_name) # ['output']

input_shape = [512, 512]

num_classes = 2

image = Image.open("163420.jpg")
#---------------------------------------------------#
# 对输入图像进行一个备份,后面用于绘图
#---------------------------------------------------#
old_img = copy.deepcopy(image)
orininal_h = np.array(image).shape[0]
orininal_w = np.array(image).shape[1]
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
# 也可以直接resize进行识别
#---------------------------------------------------------#
image_data, nw, nh = resize_image(image, (input_shape[0],input_shape[1]))
# image_data.save("unet_size_change.jpg")
print(np.shape(image_data)) # (512, 512, 3)
#---------------------------------------------------------#
# 添加 1 维度
#---------------------------------------------------------#
image_data = np.expand_dims(np.transpose((np.array(image_data, np.float32)/255.0), (2, 0, 1)), 0)
print(np.shape(image_data)) # (1, 3, 512, 512)

input_feed = get_input_feed(image_data)
pr = onnx_session.run(output_names=output_name, input_feed=input_feed)[0][0]
print(np.shape(pr)) # (2, 512, 512)

#---------------------------------------------------#
# 取出每一个像素点的种类
#---------------------------------------------------#
def softmax(x, axis):
# 依次对比两个(512,512)值 取(2)最大值, 保持维度不变=>(512,512,1)
# 然后减去
x -= np.max(x, axis=axis, keepdims=True)
# 指数函数,为了归一化 保持维度不变=>(512,512,2)
f_x = np.exp(x) / np.sum(np.exp(x), axis=axis, keepdims=True)
return f_x

pr = softmax(np.transpose(pr, (1, 2, 0)), -1)
print(np.shape(pr)) # (512, 512, 2)
#--------------------------------------#
# 将灰条部分截取掉
#--------------------------------------#
pr = pr[int((input_shape[0] - nh) // 2) : int((input_shape[0] - nh) // 2 + nh), \
int((input_shape[1] - nw) // 2) : int((input_shape[1] - nw) // 2 + nw)]
print(np.shape(pr)) #(288, 512, 2)
#---------------------------------------------------#
# 进行图片的resize
#---------------------------------------------------#
pr = cv2.resize(pr, (orininal_w, orininal_h), interpolation = cv2.INTER_LINEAR)
print(np.shape(pr)) # (1440, 2560, 2)
#---------------------------------------------------#
# 取出每一个像素点的种类
#---------------------------------------------------#
pr = pr.argmax(axis=-1)
print(np.shape(pr)) # (1440, 2560)

colors = [ (0, 0, 0), (128, 0, 0), (0, 128, 0)]

if 0 == 0:
seg_img = np.reshape(np.array(colors, np.uint8)[np.reshape(pr, [-1])], [orininal_h, orininal_w, -1])
#------------------------------------------------#
# 将新图片转换成Image的形式
#------------------------------------------------#
image = Image.fromarray(np.uint8(seg_img))
image.save("unet2.png")
#------------------------------------------------#
# 将新图与原图及进行混合
#------------------------------------------------#
image = Image.blend(old_img, image, 0.7)

image.save("unet.png")

参考

OpenCV官方文档:OpenCV: Conversion of PyTorch Classification Models and Launch with OpenCV Python

OpenCV官方代码:opencv/samples/dnn/classification.py at master · opencv/opencv (github.com)

CSDN博客:【深度学习】【Opencv】【CPU】Python/C++调用onnx模型【基础】

Rust

这部分涉及到OpenCV的图像读取、图像缩放、模型加载、图像预处理、矩阵构建、矩阵重塑、矩阵改值、权重叠加操作。

大致了解:OpenCV中Mat的属性

如果使用OpenCV:Mat to ArrayBase

https://docs.rs/ort/latest/ort/

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
use opencv::core::{self, MatTrait, MatTraitConst, Scalar, Vector, CV_32F, CV_32FC1, CV_8UC3};
use opencv::dnn::{blob_from_image, read_net_from_onnx, NetTrait, NetTraitConst};
use opencv::imgcodecs::{imread, imwrite};
use opencv::prelude::*;

use std::process::exit;

pub async fn detect() -> String {
let image = imread("assets/unet/1.jpg", opencv::imgcodecs::IMREAD_COLOR).unwrap();
// let mut image_rgb = Mat::default();
// opencv::imgproc::cvt_color(&image, &mut image_rgb, COLOR_BGR2RGB, 0);

// 获取图像的大小
let origin_height = image.size().unwrap().height;
let origin_width = image.size().unwrap().width;

// 指定调整后的大小
let new_height = 512;
let new_width = 512;

println!("h: {}, w: {}", origin_height, origin_width);

// 图片尺寸缩放
let mut resized_img = Mat::default();
opencv::imgproc::resize(
&image,
&mut resized_img,
core::Size_ {
width: new_width,
height: new_height,
},
0.0,
0.0,
opencv::imgproc::INTER_AREA,
)
.unwrap();

// 加载模型
let mut network = read_net_from_onnx("assets/models/unet_last.onnx").unwrap();
// onnx是多输出,每个输出都会对应一个name,因此需要获取所有输出的name
let output_layer_names = network.get_unconnected_out_layers_names().unwrap();

// 归一化
let mean = Scalar::new(0.485, 0.456, 0.406, 0.0) * 255.0;
let scale = 1.0 / 255.0;
// let std = Scalar::new(0.229, 0.224, 0.225, 0.0);

let blob = blob_from_image(
&resized_img,
scale,
opencv::core::Size_ {
width: new_width,
height: new_height,
},
mean,
true,
false,
CV_32F,
)
.unwrap_or_else(|e| {
eprintln!("出问题了. {}", e);
exit(1)
});

network
.set_input(&blob, "", 1.0, core::Scalar::default())
.unwrap();

let mut outs: Vector<Mat> = Vector::default();

network
.forward(&mut outs, &output_layer_names)
.unwrap_or_else(|e| {
eprintln!("出问题了. {}", e);
exit(1)
});

// dbg!(&outs);

// let size = outs.get(0).unwrap().size().unwrap();
// println!("size: {:#?}", size);
// let mut scharrx = core::Mat::new_rows_cols_with_data(size.height, size.width, CV_8UC1, outs.ptr(0), Mat_AUTO_STEP).unwrap();

let last_row_norm = process_output(&outs, &resized_img);

return serde_json::to_string(&last_row_norm).unwrap_or_default();
}

/// Process predictions and apply NMS.
fn process_output(output: &core::Vector<core::Mat>, origin_image: &core::Mat) -> f32{
let mut labels = Mat::default();
let output_4d = output.get(0).unwrap();

let output_3d = output_4d.reshape_nd(0, &[2, 512, 512]).unwrap();

// let mut output_3d_0 = output_3d.

unsafe { labels.create_nd(&[512, 512], CV_32FC1).unwrap() };

for i in 0..(512 - 1) {
for j in 0..(512 - 1) {
let temp = output_3d.at_nd::<f32>(&[0, i, j]).unwrap();
let temp1 = output_3d.at_nd::<f32>(&[1, i, j]).unwrap();
let tt = labels.at_2d_mut::<f32>(i, j).unwrap();
if *temp > *temp1 {
*tt = 0.0;
} else {
*tt = 1.0;
}
}
}

let mut mask0 = Mat::default();
unsafe { mask0.create_nd(&[512, 512], CV_8UC3).unwrap() };

let mut mask1 = mask0.clone();

let mut last_row_id = 0;
for i in 0..(512 - 1) {
for j in 0..(512 - 1) {
let t = labels.at_2d::<f32>(i, j).unwrap();
if *t == 0.0 { // 对背景区域进行着色
// bgr
let temp = mask0.at_2d_mut::<core::Vec3b>(i, j).unwrap();
temp[0] = 0; // 蓝
temp[1] = 255; // 绿
temp[2] = 0; // 红
} else { // 对识别区域进行着色
last_row_id = i;
let temp = mask1.at_2d_mut::<core::Vec3b>(i, j).unwrap();
temp[0] = 0;
temp[1] = 255; // 绿
temp[2] = 0;
}
}
}
let last_row_norm = last_row_id as f32 / 512.0;

let mut segmented_image = Mat::default();
let mut origin_image_8u = Mat::default();

origin_image
.convert_to(&mut origin_image_8u, CV_8UC3, 1.0, 0.0)
.unwrap();

opencv::core::add_weighted(
&origin_image_8u,
1.0,
&mask0,
0.5,
0.0,
&mut segmented_image,
-1,
)
.unwrap();
opencv::core::add_weighted(
&origin_image_8u,
1.0,
&mask1,
0.5,
0.0,
&mut segmented_image,
-1,
)
.unwrap();

opencv::imgproc::resize(
&segmented_image,
&mut origin_image_8u,
core::Size_ {
width: 1080,
height: 1920,
},
0.0,
0.0,
opencv::imgproc::INTER_AREA,
)
.unwrap();

let v = core::Vector::new();

match imwrite("mask0.jpg", &mask0, &v) {
Ok(value) => {
if value == false {
println!("保存图片失败");
} else {
println!("保存图片成功");
}
}
Err(e) => {
println!("保存图片失败:{}", e);
}
};

return last_row_norm;

}

参考

CSDN:OpenCV3 通道和位深以及对应的Vec的理解含义整理_depth' is 1 (cv_8s)

GitHub C++:Text-Detection/main.cpp at main · Kurmangozhin/Text-Detection (github.com)

GitHub C++:Easy_Unet/main.cpp at master · dgl547437235/Easy_Unet (github.com)

GitHub Rust at_2d:Code search results (github.com)

GitHub Rust at_nd:Code search results (github.com)