学习如何训练自定义深度学习模型,以通过Keras和TensorFlow的来执行对象检测。
1、首先安装tensorflow 2.0,参考:机器学习笔记 - win10安装tensorflow-gpu.2.2 + Cuda10+cudnn7.6.5_bashendixie5的博客-CSDN博客https://blog.csdn.net/bashendixie5/article/details/110260615
2、提前下载vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5文件,放到C:\Users\【user】\.keras\models文件夹下
1、首先安装打标签的工具,此次使用的是labelImg,git官网GitHub - tzutalin/labelImg: 🖍️ LabelImg is a graphical image annotation tool and label object bounding boxes in imageshttps://github.com/tzutalin/labelImg,安装方式官网有详细说明,如果有什么包装不上,就下载到本地再装,运行起来如下界面。
2、标记文件,得到xml文件
3、xml转csv
- import os
- import glob
- import pandas as pd
- import xml.etree.ElementTree as ET
- def xml_to_csv(path):
- xml_list = []
- # 读取注释文件
- for xml_file in glob.glob(path + '/*.xml'):
- tree = ET.parse(xml_file)
- root = tree.getroot()
- for member in root.findall('object'):
- value = (root.find('filename').text,
- int(root.find('size')[0].text),
- int(root.find('size')[1].text),
- member[0].text,
- int(member[4][0].text),
- int(member[4][1].text),
- int(member[4][2].text),
- int(member[4][3].text)
- )
- xml_list.append(value)
- column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
- # 将所有数据分为样本集和验证集,一般按照3:1的比例
- train_list = xml_list[0: int(len(xml_list) * 0.67)]
- eval_list = xml_list[int(len(xml_list) * 0.67) + 1: ]
- # 保存为CSV格式
- train_df = pd.DataFrame(train_list, columns=column_name)
- eval_df = pd.DataFrame(eval_list, columns=column_name)
- train_df.to_csv('C:/Users/zxc/Desktop/labelImg/train_peaches.csv', index=None)
- eval_df.to_csv('C:/Users/zxc/Desktop/labelImg/eval_peaches.csv', index=None)
- def main():
- path = 'C:/Users/zxc/Desktop/labelImg'
- xml_to_csv(path)
- print('Successfully converted xml to csv.')
- main()
config.py
- # import the necessary packages
- import os
- # define the base path to the input dataset and then use it to derive
- # the path to the images directory and annotation CSV file
- BASE_PATH = "C:/Users/zxc/Desktop/labelImg"
- IMAGES_PATH = os.path.sep.join([BASE_PATH, ""])
- ANNOTS_PATH = os.path.sep.join([BASE_PATH, "train_peaches.csv"])
- # define the path to the base output directory
- BASE_OUTPUT = "C:/Users/zxc/Desktop/labelImg/output"
- # define the path to the output serialized model, model training plot,
- # and testing image filenames
- MODEL_PATH = os.path.sep.join([BASE_OUTPUT, "detector.h5"])
- PLOT_PATH = os.path.sep.join([BASE_OUTPUT, "plot.png"])
- TEST_FILENAMES = os.path.sep.join([BASE_OUTPUT, "test_images.txt"])
- # initialize our initial learning rate, number of epochs to train
- # for, and the batch size
- INIT_LR = 1e-4
- NUM_EPOCHS = 25
- BATCH_SIZE = 32
train.py
- # import the necessary packages
- import config
- from tensorflow.keras.applications import VGG16
- from tensorflow.keras.layers import Flatten
- from tensorflow.keras.layers import Dense
- from tensorflow.keras.layers import Input
- from tensorflow.keras.models import Model
- from tensorflow.keras.optimizers import Adam
- from tensorflow.keras.preprocessing.image import img_to_array
- from tensorflow.keras.preprocessing.image import load_img
- from sklearn.model_selection import train_test_split
- import matplotlib.pyplot as plt
- import numpy as np
- import cv2
- import os
- # load the contents of the CSV annotations file
- print("[INFO] loading dataset...")
- rows = open(config.ANNOTS_PATH).read().strip().split("\n")
- # initialize the list of data (images), our target output predictions
- # (bounding box coordinates), along with the filenames of the
- # individual images
- data = []
- targets = []
- filenames = []
- # loop over the rows
- for row in rows:
- # break the row into the filename and bounding box coordinates
- row = row.split(",")
- (filename, startX, startY, endX, endY) = row
- # derive the path to the input image, load the image (in OpenCV
- # format), and grab its dimensions
- imagePath = os.path.sep.join([config.IMAGES_PATH, filename])
- image = cv2.imread(imagePath)
- (h, w) = image.shape[:2]
- # scale the bounding box coordinates relative to the spatial
- # dimensions of the input image
- startX = float(startX) / w
- startY = float(startY) / h
- endX = float(endX) / w
- endY = float(endY) / h
- # load the image and preprocess it
- image = load_img(imagePath, target_size=(224, 224))
- image = img_to_array(image)
- # update our list of data, targets, and filenames
- data.append(image)
- targets.append((startX, startY, endX, endY))
- filenames.append(filename)
- # convert the data and targets to NumPy arrays, scaling the input
- # pixel intensities from the range [0, 255] to [0, 1]
- data = np.array(data, dtype="float32") / 255.0
- targets = np.array(targets, dtype="float32")
- # partition the data into training and testing splits using 90% of
- # the data for training and the remaining 10% for testing
- split = train_test_split(data, targets, filenames, test_size=0.10, random_state=42)
- # unpack the data split
- (trainImages, testImages) = split[:2]
- (trainTargets, testTargets) = split[2:4]
- (trainFilenames, testFilenames) = split[4:]
- # write the testing filenames to disk so that we can use then
- # when evaluating/testing our bounding box regressor
- print("[INFO] saving testing filenames...")
- f = open(config.TEST_FILENAMES, "w")
- f.write("\n".join(testFilenames))
- f.close()
- # load the VGG16 network, ensuring the head FC layers are left off
- vgg = VGG16(weights="imagenet", include_top=False, input_tensor=Input(shape=(224, 224, 3)))
- # freeze all VGG layers so they will *not* be updated during the
- # training process
- vgg.trainable = False
- # flatten the max-pooling output of VGG
- flatten = vgg.output
- flatten = Flatten()(flatten)
- # construct a fully-connected layer header to output the predicted
- # bounding box coordinates
- bboxHead = Dense(128, activation="relu")(flatten)
- bboxHead = Dense(64, activation="relu")(bboxHead)
- bboxHead = Dense(32, activation="relu")(bboxHead)
- bboxHead = Dense(4, activation="sigmoid")(bboxHead)
- # construct the model we will fine-tune for bounding box regression
- model = Model(inputs=vgg.input, outputs=bboxHead)
- # initialize the optimizer, compile the model, and show the model
- # summary
- opt = Adam(lr=config.INIT_LR)
- model.compile(loss="mse", optimizer=opt)
- print(model.summary())
- # train the network for bounding box regression
- print("[INFO] training bounding box regressor...")
- H = model.fit(
- trainImages, trainTargets,
- validation_data=(testImages, testTargets),
- batch_size=config.BATCH_SIZE,
- epochs=config.NUM_EPOCHS,
- verbose=1)
- # serialize the model to disk
- print("[INFO] saving object detector model...")
- model.save(config.MODEL_PATH, save_format="h5")
- # plot the model training history
- N = config.NUM_EPOCHS
- plt.style.use("ggplot")
- plt.figure()
- plt.plot(np.arange(0, N), H.history["loss"], label="train_loss")
- plt.plot(np.arange(0, N), H.history["val_loss"], label="val_loss")
- plt.title("Bounding Box Regression Loss on Training Set")
- plt.xlabel("Epoch #")
- plt.ylabel("Loss")
- plt.legend(loc="lower left")
- plt.savefig(config.PLOT_PATH)
predict.py
- # import the necessary packages
- import config
- from tensorflow.keras.preprocessing.image import img_to_array
- from tensorflow.keras.preprocessing.image import load_img
- from tensorflow.keras.models import load_model
- import numpy as np
- import mimetypes
- import argparse
- import imutils
- import cv2
- import os
- # construct the argument parser and parse the arguments
- ap = argparse.ArgumentParser()
- ap.add_argument("-i", "--input", required=True,
- help="path to input image/text file of image filenames")
- args = vars(ap.parse_args())
- # determine the input file type, but assume that we're working with
- # single input image
- filetype = mimetypes.guess_type(args["input"])[0]
- imagePaths = [args["input"]]
- # if the file type is a text file, then we need to process *multiple*
- # images
- if "text/plain" == filetype:
- # load the filenames in our testing file and initialize our list
- # of image paths
- filenames = open(args["input"]).read().strip().split("\n")
- imagePaths = []
- # loop over the filenames
- for f in filenames:
- # construct the full path to the image filename and then
- # update our image paths list
- p = os.path.sep.join([config.IMAGES_PATH, f])
- imagePaths.append(p)
- # load our trained bounding box regressor from disk
- print("[INFO] loading object detector...")
- model = load_model(config.MODEL_PATH)
- # loop over the images that we'll be testing using our bounding box
- # regression model
- for imagePath in imagePaths:
- # load the input image (in Keras format) from disk and preprocess
- # it, scaling the pixel intensities to the range [0, 1]
- image = load_img(imagePath, target_size=(224, 224))
- image = img_to_array(image) / 255.0
- image = np.expand_dims(image, axis=0)
- # make bounding box predictions on the input image
- preds = model.predict(image)[0]
- (startX, startY, endX, endY) = preds
- # load the input image (in OpenCV format), resize it such that it
- # fits on our screen, and grab its dimensions
- image = cv2.imread(imagePath)
- image = imutils.resize(image, width=600)
- (h, w) = image.shape[:2]
- # scale the predicted bounding box coordinates based on the image
- # dimensions
- startX = int(startX * w)
- startY = int(startY * h)
- endX = int(endX * w)
- endY = int(endY * h)
- # draw the predicted bounding box on the image
- cv2.rectangle(image, (startX, startY), (endX, endY), (0, 255, 0), 2)
- # show the output image
- cv2.imwrite("C:/Users/csv/Desktop/111.png", image)
- cv2.waitKey(0)
由于我只准备了几十张图片,而且不是有明显特征的,所以训练结果很差,就不上结果图了,啥都看不出来。训练完成得到h5文件,可以用于测试。
准备了246张猫咪的图片,对头部进行了重新标记,训练结果也是不太理想,应该还是数据集不够,质量一般。
从预测结果看 ,位置还是差很多,回头再仔细准备数据集测试看看
联系客服