由于疫情的影响,口罩已经成为人们生活日常中的必需品,在某些场合中,更要必须佩戴口罩,那么口罩检测就成了必然的问题。今天,我们就来看看,基于卷积神经网络,怎么做一个能够检测人是否戴口罩的demo。
1.首先搭建数据集,结构如下:
1.Facialmask | | 已佩戴口罩 | | | | | 图片 | | 未佩戴口罩 | | | | | 图片 | | image.csv
其中image.csv写一个数据集预处理程序搞定:
# -*-coding:utf-8-*- # @Author: Phantom # @Email: 2909981736@qq.com # @编译环境:windows 10 + python3.8 # @IDE:Pycharm2021.1.3 import csv import glob import os import random os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers import numpy as np gpus = tf.config.experimental.list_physical_devices(device_type='GPU') for gpu in gpus: tf.config.experimental.set_memory_growth(device=gpu, enable=True) # 加载处理数据集 def load_csv(root, filename, name2label): # 从csv文件返回images,labels列表 # root:数据集根目录,filename:csv文件名, name2label:类别名编码表 if not os.path.exists(os.path.join(root, filename)): # 如果csv文件不存在,则创建 images = [] for name in name2label.keys(): # 遍历所有子目录,获得所有的图片 # 只考虑后缀为png,jpg,jpeg的图片:'pokemon\\mewtwo\\00001.png images += glob.glob(os.path.join(root, name, '*png')) # glob.glob()字符串匹配 images += glob.glob(os.path.join(root, name, '*.jpg')) images += glob.glob(os.path.join(root, name, '*.jpeg')) # 打印数据集信息:1167, 'pokemon\\bulbasaur\\00000000.png' print(len(images), images) random.shuffle(images) # 随机打散顺序 # 创建csv文件,并存储图片路径及其label信息 with open(os.path.join(root, filename), mode='w', newline='') as f: writer = csv.writer(f) for img in images: name = img.split(os.sep)[-2] # 倒数第二个元素(就是name) label = name2label[name] writer.writerow([img, label]) print('written into csv file:', filename) # 此时已经有csv文件,直接读取 images, labels = [], [] with open(os.path.join(root, filename)) as f: reader = csv.reader(f) for row in reader: # 'pokemon\\bulbasaur\\00000000.png', 0 img, label = row label = int(label) images.append(img) labels.append(label) # 返回图片路径list和标签list return images, labels def load_facialmask(root, mode='train'): # 创建数字编码表 name2label = {} # 'sq...':0 # 遍历根目录下的子文件夹,并排序,保证映射关系固定 for name in sorted(os.listdir(os.path.join(root))): # 跳过非文件夹 if not os.path.isdir(os.path.join(root, name)): continue # 给每个类别编码一个数字 name2label[name] = len(name2label.keys()) # 读取Lable信息 # [file1,file2],[3, 1] images, labels = load_csv(root, 'image.csv', name2label) if mode == 'train': # 60% images = images[:int(0.6 * len(images))] labels = labels[:int(0.6 * len(labels))] elif mode == 'val': # 20% = 60%->80% images = images[int(0.6 * len(images)):int(0.8 * len(images))] labels = labels[int(0.6 * len(labels)):int(0.8 * len(labels))] else: # 20% = 80%->100% images = images[int(0.8 * len(images)):] labels = labels[int(0.8 * len(labels)):] return images, labels, name2label # 这里的mean和std根据真实的数据计算获得,比如ImageNet img_mean = tf.constant([0.485, 0.456, 0.406]) img_std = tf.constant([0.229, 0.224, 0.225]) def normalize(x, mean=img_mean, std=img_std): # 标准化 x = (x-mean)/std return x def denormalize(x, mean=img_mean, std=img_std): # 标准化的逆过程 x = x*std + mean return x def preprocess(x, y): # x: 图片的路径List,y:图片的数字编码List x = tf.io.read_file(x) # 根据路径读取图片 x = tf.image.decode_jpeg(x, channels=3) # 图片解码 x = tf.image.resize(x, [244, 244]) # 图片缩放 # data augmentation(数据增强) # x = tf.image.random_flip_up_down(x) # 上下翻转 x = tf.image.random_flip_left_right(x) # 左右翻转 x = tf.image.random_crop(x, [224, 224, 3]) x = tf.cast(x, dtype=tf.float32)/255. # 0~1 => D(0,1) normalize x = normalize(x) # 标准化 y = tf.convert_to_tensor(y) return x, y def main(): import time # 加载口罩数据集,指定加载训练集 images, labels, table = load_facialmask('Facialmask', 'train') print('images:', len(images), images) print('labels:', len(labels), labels) print('table:', table) # images: string path # labels: number db = tf.data.Dataset.from_tensor_slices((images, labels)) db = db.shuffle(1000).map(preprocess).batch(32) # 创建TensorBoard(可视化)对象 writter = tf.summary.create_file_writer('logs') for step, (x, y) in enumerate(db): # x:[32, 224, 224, 3] # y:[32] with writter.as_default(): x = denormalize(x) # 反向normalize,方便可视化 # 写入图片数据 tf.summary.image('img', x, step=step, max_outputs=9) time.sleep(3) if __name__ == '__main__': main()
2.训练:数据集找了330张戴口罩图片,找了330张未戴口罩图片,使用Tensorflow2框架,神经网络运用的是TF2中自带的VGG19,最后再加个全连接层。
# -*-coding:utf-8-*- # @Author: Phantom # @Email: 2909981736@qq.com # @编译环境:windows 10 + python3.8 # @IDE:Pycharm2021.1.3 import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' import cv2.cv2 as cv import numpy as np import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers, optimizers, Sequential from tensorflow.keras.callbacks import EarlyStopping from image预处理 import load_facialmask, normalize tf.random.set_seed(1234) np.random.seed(1234) # transfer gpus = tf.config.experimental.list_physical_devices(device_type='GPU') for gpu in gpus: tf.config.experimental.set_memory_growth(device=gpu, enable=True) def preprocess(x, y): # x: 图片的路径List,y:图片的数字编码List x = tf.io.read_file(x) # 根据路径读取图片 x = tf.image.decode_jpeg(x, channels=3) # 图片解码 x = tf.image.resize(x, [244, 244]) # 图片缩放 # data augmentation(数据增强) # x = tf.image.random_flip_up_down(x) # 上下翻转 x = tf.image.random_flip_left_right(x) # 左右翻转 x = tf.image.random_crop(x, [224, 224, 3]) x = tf.cast(x, dtype=tf.float32) / 255. # 0~1 => D(0,1) normalize x = normalize(x) # 标准化 y = tf.convert_to_tensor(y) y = tf.one_hot(y, depth=2) return x, y batchsz = 128 images, labels, _ = load_facialmask('Facialmask', mode='train') db_train = tf.data.Dataset.from_tensor_slices((images, labels)) db_train = db_train.map(preprocess).shuffle(500).batch(batchsz) images2, labels2, _ = load_facialmask('Facialmask', mode='val') db_val = tf.data.Dataset.from_tensor_slices((images2, labels2)) db_val = db_val.map(preprocess).batch(batchsz) images3, labels3, _ = load_facialmask('Facialmask', mode='test') db_test = tf.data.Dataset.from_tensor_slices((images3, labels3)) db_test = db_test.map(preprocess).batch(batchsz) if not os.path.exists(os.path.join(r'D:\Python\pycharm project\tensorflow2\Facialmask', 'facialmask.h5')): # 导入已经训练好的经典网络 net = keras.applications.VGG19(weights='imagenet', include_top=False, pooling='max') net.trainable = False newnet = Sequential([ net, layers.Dense(2) ]) # resnet = ResNet(5) newnet.build(input_shape=(None, 224, 224, 3)) newnet.summary() # 监听指定指标 early_stopping = EarlyStopping( monitor='val_accuracy', min_delta=0.001, patience=5 # 连续5次没有增加0.001 ) newnet.compile(optimizer=optimizers.Adam(1e-3), loss=tf.losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy']) newnet.fit(db_train, epochs=100, validation_data=db_val, validation_freq=1, callbacks=[early_stopping]) newnet.evaluate(db_test) newnet.save('facialmask.h5') print('saved total model.') else: newnet = tf.keras.models.load_model('facialmask.h5') print('load model from file!') table = ['已佩戴口罩', '未佩戴口罩'] x = tf.io.read_file('2.png') # 根据路径读取图片 img = cv.imread('2.png') cv.imshow('3', img) x = tf.image.decode_jpeg(x, channels=3) # 图片解码 x = tf.image.resize(x, [224, 224]) # 图片缩放 x = tf.cast(x, dtype=tf.float32) / 255. # 0~1 => D(0,1) normalize x = normalize(x) # 标准化 x = tf.reshape(x, [1, 224, 224, 3]) logits = newnet(x) prob = tf.nn.softmax(logits, axis=1) pred = tf.argmax(prob, axis=1) pred = tf.cast(pred, dtype=tf.int32) num = int(pred) print(table[num]) cv.waitKey(0)
训练结果如下:
最终提前了early_stoping, 测试集正确率达到94%,效果还不错。