first
This commit is contained in:
		
						commit
						4a2986bde5
					
				
							
								
								
									
										140
									
								
								.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										140
									
								
								.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,140 @@ | |||||||
|  | # ignore map, miou, datasets | ||||||
|  | map_out/ | ||||||
|  | miou_out/ | ||||||
|  | VOCdevkit/ | ||||||
|  | datasets/ | ||||||
|  | Medical_Datasets/ | ||||||
|  | lfw/ | ||||||
|  | logs/ | ||||||
|  | model_data/ | ||||||
|  | .temp_map_out/ | ||||||
|  | 
 | ||||||
|  | # Byte-compiled / optimized / DLL files | ||||||
|  | __pycache__/ | ||||||
|  | *.py[cod] | ||||||
|  | *$py.class | ||||||
|  | 
 | ||||||
|  | # C extensions | ||||||
|  | *.so | ||||||
|  | 
 | ||||||
|  | # Distribution / packaging | ||||||
|  | .Python | ||||||
|  | build/ | ||||||
|  | develop-eggs/ | ||||||
|  | dist/ | ||||||
|  | downloads/ | ||||||
|  | eggs/ | ||||||
|  | .eggs/ | ||||||
|  | lib/ | ||||||
|  | lib64/ | ||||||
|  | parts/ | ||||||
|  | sdist/ | ||||||
|  | var/ | ||||||
|  | wheels/ | ||||||
|  | pip-wheel-metadata/ | ||||||
|  | share/python-wheels/ | ||||||
|  | *.egg-info/ | ||||||
|  | .installed.cfg | ||||||
|  | *.egg | ||||||
|  | MANIFEST | ||||||
|  | 
 | ||||||
|  | # PyInstaller | ||||||
|  | #  Usually these files are written by a python script from a template | ||||||
|  | #  before PyInstaller builds the exe, so as to inject date/other infos into it. | ||||||
|  | *.manifest | ||||||
|  | *.spec | ||||||
|  | 
 | ||||||
|  | # Installer logs | ||||||
|  | pip-log.txt | ||||||
|  | pip-delete-this-directory.txt | ||||||
|  | 
 | ||||||
|  | # Unit test / coverage reports | ||||||
|  | htmlcov/ | ||||||
|  | .tox/ | ||||||
|  | .nox/ | ||||||
|  | .coverage | ||||||
|  | .coverage.* | ||||||
|  | .cache | ||||||
|  | nosetests.xml | ||||||
|  | coverage.xml | ||||||
|  | *.cover | ||||||
|  | *.py,cover | ||||||
|  | .hypothesis/ | ||||||
|  | .pytest_cache/ | ||||||
|  | 
 | ||||||
|  | # Translations | ||||||
|  | *.mo | ||||||
|  | *.pot | ||||||
|  | 
 | ||||||
|  | # Django stuff: | ||||||
|  | *.log | ||||||
|  | local_settings.py | ||||||
|  | db.sqlite3 | ||||||
|  | db.sqlite3-journal | ||||||
|  | 
 | ||||||
|  | # Flask stuff: | ||||||
|  | instance/ | ||||||
|  | .webassets-cache | ||||||
|  | 
 | ||||||
|  | # Scrapy stuff: | ||||||
|  | .scrapy | ||||||
|  | 
 | ||||||
|  | # Sphinx documentation | ||||||
|  | docs/_build/ | ||||||
|  | 
 | ||||||
|  | # PyBuilder | ||||||
|  | target/ | ||||||
|  | 
 | ||||||
|  | # Jupyter Notebook | ||||||
|  | .ipynb_checkpoints | ||||||
|  | 
 | ||||||
|  | # IPython | ||||||
|  | profile_default/ | ||||||
|  | ipython_config.py | ||||||
|  | 
 | ||||||
|  | # pyenv | ||||||
|  | .python-version | ||||||
|  | 
 | ||||||
|  | # pipenv | ||||||
|  | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. | ||||||
|  | #   However, in case of collaboration, if having platform-specific dependencies or dependencies | ||||||
|  | #   having no cross-platform support, pipenv may install dependencies that don't work, or not | ||||||
|  | #   install all needed dependencies. | ||||||
|  | #Pipfile.lock | ||||||
|  | 
 | ||||||
|  | # PEP 582; used by e.g. github.com/David-OConnor/pyflow | ||||||
|  | __pypackages__/ | ||||||
|  | 
 | ||||||
|  | # Celery stuff | ||||||
|  | celerybeat-schedule | ||||||
|  | celerybeat.pid | ||||||
|  | 
 | ||||||
|  | # SageMath parsed files | ||||||
|  | *.sage.py | ||||||
|  | 
 | ||||||
|  | # Environments | ||||||
|  | .env | ||||||
|  | .venv | ||||||
|  | env/ | ||||||
|  | venv/ | ||||||
|  | ENV/ | ||||||
|  | env.bak/ | ||||||
|  | venv.bak/ | ||||||
|  | 
 | ||||||
|  | # Spyder project settings | ||||||
|  | .spyderproject | ||||||
|  | .spyproject | ||||||
|  | 
 | ||||||
|  | # Rope project settings | ||||||
|  | .ropeproject | ||||||
|  | 
 | ||||||
|  | # mkdocs documentation | ||||||
|  | /site | ||||||
|  | 
 | ||||||
|  | # mypy | ||||||
|  | .mypy_cache/ | ||||||
|  | .dmypy.json | ||||||
|  | dmypy.json | ||||||
|  | 
 | ||||||
|  | # Pyre type checker | ||||||
|  | .pyre/ | ||||||
							
								
								
									
										161
									
								
								Dataset_Partition.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										161
									
								
								Dataset_Partition.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,161 @@ | |||||||
|  | import os | ||||||
|  | import random | ||||||
|  | import xml.etree.ElementTree as ET | ||||||
|  | 
 | ||||||
|  | import numpy as np | ||||||
|  | 
 | ||||||
|  | from utils.utils import get_classes | ||||||
|  | 
 | ||||||
|  | # --------------------------------------------------------------------------------------------------------------------------------# | ||||||
|  | #   annotation_mode用于指定该文件运行时计算的内容 | ||||||
|  | #   annotation_mode为0代表整个标签处理过程,包括获得VOCdevkit/VOC2007/ImageSets里面的txt以及训练用的2007_train.txt、2007_val.txt | ||||||
|  | #   annotation_mode为1代表获得VOCdevkit/VOC2007/ImageSets里面的txt | ||||||
|  | #   annotation_mode为2代表获得训练用的2007_train.txt、2007_val.txt | ||||||
|  | # --------------------------------------------------------------------------------------------------------------------------------# | ||||||
|  | annotation_mode = 0 | ||||||
|  | # -------------------------------------------------------------------# | ||||||
|  | #   必须要修改,用于生成2007_train.txt、2007_val.txt的目标信息 | ||||||
|  | #   与训练和预测所用的classes_path一致即可 | ||||||
|  | #   如果生成的2007_train.txt里面没有目标信息 | ||||||
|  | #   那么就是因为classes没有设定正确 | ||||||
|  | #   仅在annotation_mode为0和2的时候有效 | ||||||
|  | # -------------------------------------------------------------------# | ||||||
|  | classes_path = 'model_data/voc_classes.txt' | ||||||
|  | # --------------------------------------------------------------------------------------------------------------------------------# | ||||||
|  | #   trainval_percent用于指定(训练集+验证集)与测试集的比例,默认情况下 (训练集+验证集):测试集 = 9:1 | ||||||
|  | #   train_percent用于指定(训练集+验证集)中训练集与验证集的比例,默认情况下 训练集:验证集 = 9:1 | ||||||
|  | #   仅在annotation_mode为0和1的时候有效 | ||||||
|  | # --------------------------------------------------------------------------------------------------------------------------------# | ||||||
|  | trainval_percent = 0.9 | ||||||
|  | train_percent = 0.9 | ||||||
|  | # -------------------------------------------------------# | ||||||
|  | #   指向VOC数据集所在的文件夹 | ||||||
|  | #   默认指向根目录下的VOC数据集 | ||||||
|  | # -------------------------------------------------------# | ||||||
|  | VOCdevkit_path = 'VOCdevkit' | ||||||
|  | 
 | ||||||
|  | VOCdevkit_sets = [('2007', 'train'), ('2007', 'val')] | ||||||
|  | classes, _ = get_classes(classes_path) | ||||||
|  | 
 | ||||||
|  | # -------------------------------------------------------# | ||||||
|  | #   统计目标数量 | ||||||
|  | # -------------------------------------------------------# | ||||||
|  | photo_nums = np.zeros(len(VOCdevkit_sets))  # 生成train的数目,val的数目 | ||||||
|  | nums = np.zeros(len(classes)) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def convert_annotation(year, image_id, list_file): | ||||||
|  |     in_file = open(os.path.join(VOCdevkit_path, 'VOC%s/Annotations/%s.xml' % (year, image_id)), encoding='utf-8') | ||||||
|  |     tree = ET.parse(in_file) | ||||||
|  |     root = tree.getroot() | ||||||
|  | 
 | ||||||
|  |     for obj in root.iter('object'): | ||||||
|  |         difficult = 0 | ||||||
|  |         if obj.find('difficult') != None: | ||||||
|  |             difficult = obj.find('difficult').text | ||||||
|  |         cls = obj.find('name').text | ||||||
|  |         if cls not in classes or int(difficult) == 1: | ||||||
|  |             continue | ||||||
|  |         cls_id = classes.index(cls) | ||||||
|  |         xmlbox = obj.find('bndbox') | ||||||
|  |         b = (int(float(xmlbox.find('xmin').text)), int(float(xmlbox.find('ymin').text)), | ||||||
|  |              int(float(xmlbox.find('xmax').text)), int(float(xmlbox.find('ymax').text))) | ||||||
|  |         list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id)) | ||||||
|  | 
 | ||||||
|  |         nums[classes.index(cls)] = nums[classes.index(cls)] + 1  # 统计各个类别的个数 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | if __name__ == "__main__": | ||||||
|  |     random.seed(0) | ||||||
|  |     if " " in os.path.abspath(VOCdevkit_path): | ||||||
|  |         raise ValueError("数据集存放的文件夹路径与图片名称中不可以存在空格,否则会影响正常的模型训练,请注意修改。") | ||||||
|  | 
 | ||||||
|  |     if annotation_mode == 0 or annotation_mode == 1: | ||||||
|  |         print("Generate txt in ImageSets.") | ||||||
|  |         xmlfilepath = os.path.join(VOCdevkit_path, 'VOC2007/Annotations') | ||||||
|  |         saveBasePath = os.path.join(VOCdevkit_path, 'VOC2007/ImageSets') | ||||||
|  |         temp_xml = os.listdir(xmlfilepath) | ||||||
|  |         total_xml = [] | ||||||
|  |         for xml in temp_xml: | ||||||
|  |             if xml.endswith(".xml"): | ||||||
|  |                 total_xml.append(xml) | ||||||
|  | 
 | ||||||
|  |         num = len(total_xml) | ||||||
|  |         list = range(num) | ||||||
|  |         tv = int(num * trainval_percent)  # 训练、验证集 总数 | ||||||
|  |         tr = int(tv * train_percent)  # 训练、验证集中 训练集的总数 | ||||||
|  |         trainval = random.sample(list, tv)  # 在总数里采样 | ||||||
|  |         train = random.sample(trainval, tr)  # 在tv中采样tr | ||||||
|  | 
 | ||||||
|  |         print("train and val size", tv) | ||||||
|  |         print("train size", tr) | ||||||
|  |         ftrainval = open(os.path.join(saveBasePath, 'trainval.txt'), 'w') | ||||||
|  |         ftest = open(os.path.join(saveBasePath, 'test.txt'), 'w') | ||||||
|  |         ftrain = open(os.path.join(saveBasePath, 'train.txt'), 'w') | ||||||
|  |         fval = open(os.path.join(saveBasePath, 'val.txt'), 'w') | ||||||
|  | 
 | ||||||
|  |         for i in list: | ||||||
|  |             name = total_xml[i][:-4] + '\n' | ||||||
|  |             if i in trainval: | ||||||
|  |                 ftrainval.write(name) | ||||||
|  |                 if i in train: | ||||||
|  |                     ftrain.write(name) | ||||||
|  |                 else: | ||||||
|  |                     fval.write(name) | ||||||
|  |             else: | ||||||
|  |                 ftest.write(name) | ||||||
|  | 
 | ||||||
|  |         ftrainval.close() | ||||||
|  |         ftrain.close() | ||||||
|  |         fval.close() | ||||||
|  |         ftest.close() | ||||||
|  |         print("Generate txt in ImageSets done.") | ||||||
|  | 
 | ||||||
|  |     if annotation_mode == 0 or annotation_mode == 2: | ||||||
|  |         print("Generate 2007_train.txt and 2007_val.txt for train.") | ||||||
|  |         type_index = 0 | ||||||
|  |         for year, image_set in VOCdevkit_sets: | ||||||
|  |             image_ids = open(os.path.join(VOCdevkit_path, 'VOC%s/ImageSets/Main/%s.txt' % (year, image_set)), | ||||||
|  |                              encoding='utf-8').read().strip().split() | ||||||
|  |             list_file = open('%s_%s.txt' % (year, image_set), 'w', encoding='utf-8') | ||||||
|  |             for image_id in image_ids: | ||||||
|  |                 list_file.write( | ||||||
|  |                     '%s/VOC%s/JPEGImages/%s.jpg' % (os.path.abspath(VOCdevkit_path), year, image_id))  # 文件名字是拼出来的 | ||||||
|  | 
 | ||||||
|  |                 convert_annotation(year, image_id, list_file) | ||||||
|  |                 list_file.write('\n') | ||||||
|  |             photo_nums[type_index] = len(image_ids) | ||||||
|  |             type_index += 1 | ||||||
|  |             list_file.close() | ||||||
|  |         print("Generate 2007_train.txt and 2007_val.txt for train done.") | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |         def printTable(List1, List2): | ||||||
|  |             for i in range(len(List1[0])): | ||||||
|  |                 print("|", end=' ') | ||||||
|  |                 for j in range(len(List1)): | ||||||
|  |                     print(List1[j][i].rjust(int(List2[j])), end=' ') | ||||||
|  |                     print("|", end=' ') | ||||||
|  |                 print() | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |         str_nums = [str(int(x)) for x in nums] | ||||||
|  |         tableData = [ | ||||||
|  |             classes, str_nums | ||||||
|  |         ] | ||||||
|  |         colWidths = [0] * len(tableData) | ||||||
|  |         len1 = 0 | ||||||
|  |         for i in range(len(tableData)): | ||||||
|  |             for j in range(len(tableData[i])): | ||||||
|  |                 if len(tableData[i][j]) > colWidths[i]: | ||||||
|  |                     colWidths[i] = len(tableData[i][j]) | ||||||
|  |         printTable(tableData, colWidths) | ||||||
|  | 
 | ||||||
|  |         if photo_nums[0] <= 500: | ||||||
|  |             print("训练集数量小于500,属于较小的数据量,请注意设置较大的训练世代(Epoch)以满足足够的梯度下降次数(Step)。") | ||||||
|  | 
 | ||||||
|  |         if np.sum(nums) == 0: | ||||||
|  |             print("在数据集中并未获得任何目标,请注意修改classes_path对应自己的数据集,并且保证标签名字正确,否则训练将会没有任何效果!") | ||||||
|  |             print("在数据集中并未获得任何目标,请注意修改classes_path对应自己的数据集,并且保证标签名字正确,否则训练将会没有任何效果!") | ||||||
|  |             print("在数据集中并未获得任何目标,请注意修改classes_path对应自己的数据集,并且保证标签名字正确,否则训练将会没有任何效果!") | ||||||
|  |             print("(重要的事情说三遍)。") | ||||||
							
								
								
									
										21
									
								
								LICENSE
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								LICENSE
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,21 @@ | |||||||
|  | MIT License | ||||||
|  | 
 | ||||||
|  | Copyright (c) 2020 JiaQi Xu | ||||||
|  | 
 | ||||||
|  | Permission is hereby granted, free of charge, to any person obtaining a copy | ||||||
|  | of this software and associated documentation files (the "Software"), to deal | ||||||
|  | in the Software without restriction, including without limitation the rights | ||||||
|  | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||||||
|  | copies of the Software, and to permit persons to whom the Software is | ||||||
|  | furnished to do so, subject to the following conditions: | ||||||
|  | 
 | ||||||
|  | The above copyright notice and this permission notice shall be included in all | ||||||
|  | copies or substantial portions of the Software. | ||||||
|  | 
 | ||||||
|  | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||||||
|  | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||||||
|  | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||||||
|  | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||||||
|  | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||||||
|  | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||||
|  | SOFTWARE. | ||||||
							
								
								
									
										46
									
								
								ad_train.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								ad_train.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,46 @@ | |||||||
|  | from torch import optim | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class BaseConfig(object): | ||||||
|  |     """ | ||||||
|  |     Default parameters for all config files. | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     def __init__(self): | ||||||
|  |         """ | ||||||
|  |         Set the defaults. | ||||||
|  |         """ | ||||||
|  |         self.img_dir = "inria/Train/pos" | ||||||
|  |         self.lab_dir = "inria/Train/pos/yolo-labels" | ||||||
|  |         self.cfgfile = "cfg/yolo.cfg" | ||||||
|  |         self.weightfile = "weights/yolo.weights" | ||||||
|  |         self.printfile = "non_printability/30values.txt" | ||||||
|  |         self.patch_size = 300 | ||||||
|  | 
 | ||||||
|  |         self.start_learning_rate = 0.03 | ||||||
|  | 
 | ||||||
|  |         self.patch_name = 'base' | ||||||
|  | 
 | ||||||
|  |         self.scheduler_factory = lambda x: optim.lr_scheduler.ReduceLROnPlateau(x, 'min', patience=50) | ||||||
|  |         self.max_tv = 0 | ||||||
|  | 
 | ||||||
|  |         self.batch_size = 20 | ||||||
|  | 
 | ||||||
|  |         self.loss_target = lambda obj, cls: obj * cls | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class ReproducePaperObj(BaseConfig): | ||||||
|  |     """ | ||||||
|  |     Reproduce the results from the paper: Generate a patch that minimises object score. | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     def __init__(self): | ||||||
|  |         super().__init__() | ||||||
|  | 
 | ||||||
|  |         self.batch_size = 8 | ||||||
|  |         self.patch_size = 300 | ||||||
|  | 
 | ||||||
|  |         self.patch_name = 'ObjectOnlyPaper' | ||||||
|  |         self.max_tv = 0.165 | ||||||
|  | 
 | ||||||
|  |         self.loss_target = lambda obj, cls: obj | ||||||
							
								
								
									
										138
									
								
								get_map.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										138
									
								
								get_map.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,138 @@ | |||||||
|  | import os | ||||||
|  | import xml.etree.ElementTree as ET | ||||||
|  | 
 | ||||||
|  | from PIL import Image | ||||||
|  | from tqdm import tqdm | ||||||
|  | 
 | ||||||
|  | from utils.utils import get_classes | ||||||
|  | from utils.utils_map import get_coco_map, get_map | ||||||
|  | from yolo import YOLO | ||||||
|  | 
 | ||||||
|  | if __name__ == "__main__": | ||||||
|  |     ''' | ||||||
|  |     Recall和Precision不像AP是一个面积的概念,因此在门限值(Confidence)不同时,网络的Recall和Precision值是不同的。 | ||||||
|  |     默认情况下,本代码计算的Recall和Precision代表的是当门限值(Confidence)为0.5时,所对应的Recall和Precision值。 | ||||||
|  | 
 | ||||||
|  |     受到mAP计算原理的限制,网络在计算mAP时需要获得近乎所有的预测框,这样才可以计算不同门限条件下的Recall和Precision值 | ||||||
|  |     因此,本代码获得的map_out/detection-results/里面的txt的框的数量一般会比直接predict多一些,目的是列出所有可能的预测框, | ||||||
|  |     ''' | ||||||
|  |     # ------------------------------------------------------------------------------------------------------------------# | ||||||
|  |     #   map_mode用于指定该文件运行时计算的内容 | ||||||
|  |     #   map_mode为0代表整个map计算流程,包括获得预测结果、获得真实框、计算VOC_map。 | ||||||
|  |     #   map_mode为1代表仅仅获得预测结果。 | ||||||
|  |     #   map_mode为2代表仅仅获得真实框。 | ||||||
|  |     #   map_mode为3代表仅仅计算VOC_map。 | ||||||
|  |     #   map_mode为4代表利用COCO工具箱计算当前数据集的0.50:0.95map。需要获得预测结果、获得真实框后并安装pycocotools才行 | ||||||
|  |     # -------------------------------------------------------------------------------------------------------------------# | ||||||
|  |     map_mode = 0 | ||||||
|  |     # --------------------------------------------------------------------------------------# | ||||||
|  |     #   此处的classes_path用于指定需要测量VOC_map的类别 | ||||||
|  |     #   一般情况下与训练和预测所用的classes_path一致即可 | ||||||
|  |     # --------------------------------------------------------------------------------------# | ||||||
|  |     classes_path = 'model_data/voc_classes.txt' | ||||||
|  |     # --------------------------------------------------------------------------------------# | ||||||
|  |     #   MINOVERLAP用于指定想要获得的mAP0.x,mAP0.x的意义是什么请同学们百度一下。 | ||||||
|  |     #   比如计算mAP0.75,可以设定MINOVERLAP = 0.75。 | ||||||
|  |     # | ||||||
|  |     #   当某一预测框与真实框重合度大于MINOVERLAP时,该预测框被认为是正样本,否则为负样本。 | ||||||
|  |     #   因此MINOVERLAP的值越大,预测框要预测的越准确才能被认为是正样本,此时算出来的mAP值越低, | ||||||
|  |     # --------------------------------------------------------------------------------------# | ||||||
|  |     MINOVERLAP = 0.5 | ||||||
|  |     # --------------------------------------------------------------------------------------# | ||||||
|  |     #   受到mAP计算原理的限制,网络在计算mAP时需要获得近乎所有的预测框,这样才可以计算mAP | ||||||
|  |     #   因此,confidence的值应当设置的尽量小进而获得全部可能的预测框。 | ||||||
|  |     #    | ||||||
|  |     #   该值一般不调整。因为计算mAP需要获得近乎所有的预测框,此处的confidence不能随便更改。 | ||||||
|  |     #   想要获得不同门限值下的Recall和Precision值,请修改下方的score_threhold。 | ||||||
|  |     # --------------------------------------------------------------------------------------# | ||||||
|  |     confidence = 0.001 | ||||||
|  |     # --------------------------------------------------------------------------------------# | ||||||
|  |     #   预测时使用到的非极大抑制值的大小,越大表示非极大抑制越不严格。 | ||||||
|  |     #    | ||||||
|  |     #   该值一般不调整。 | ||||||
|  |     # --------------------------------------------------------------------------------------# | ||||||
|  |     nms_iou = 0.5 | ||||||
|  |     # ---------------------------------------------------------------------------------------------------------------# | ||||||
|  |     #   Recall和Precision不像AP是一个面积的概念,因此在门限值不同时,网络的Recall和Precision值是不同的。 | ||||||
|  |     #    | ||||||
|  |     #   默认情况下,本代码计算的Recall和Precision代表的是当门限值为0.5(此处定义为score_threhold)时所对应的Recall和Precision值。 | ||||||
|  |     #   因为计算mAP需要获得近乎所有的预测框,上面定义的confidence不能随便更改。 | ||||||
|  |     #   这里专门定义一个score_threhold用于代表门限值,进而在计算mAP时找到门限值对应的Recall和Precision值。 | ||||||
|  |     # ---------------------------------------------------------------------------------------------------------------# | ||||||
|  |     score_threhold = 0.5 | ||||||
|  |     # -------------------------------------------------------# | ||||||
|  |     #   map_vis用于指定是否开启VOC_map计算的可视化 | ||||||
|  |     # -------------------------------------------------------# | ||||||
|  |     map_vis = False | ||||||
|  |     # -------------------------------------------------------# | ||||||
|  |     #   指向VOC数据集所在的文件夹 | ||||||
|  |     #   默认指向根目录下的VOC数据集 | ||||||
|  |     # -------------------------------------------------------# | ||||||
|  |     VOCdevkit_path = 'VOCdevkit' | ||||||
|  |     # -------------------------------------------------------# | ||||||
|  |     #   结果输出的文件夹,默认为map_out | ||||||
|  |     # -------------------------------------------------------# | ||||||
|  |     map_out_path = 'map_out' | ||||||
|  | 
 | ||||||
|  |     image_ids = open(os.path.join(VOCdevkit_path, "VOC2007/ImageSets/Main/test.txt")).read().strip().split() | ||||||
|  | 
 | ||||||
|  |     if not os.path.exists(map_out_path): | ||||||
|  |         os.makedirs(map_out_path) | ||||||
|  |     if not os.path.exists(os.path.join(map_out_path, 'ground-truth')): | ||||||
|  |         os.makedirs(os.path.join(map_out_path, 'ground-truth')) | ||||||
|  |     if not os.path.exists(os.path.join(map_out_path, 'detection-results')): | ||||||
|  |         os.makedirs(os.path.join(map_out_path, 'detection-results')) | ||||||
|  |     if not os.path.exists(os.path.join(map_out_path, 'images-optional')): | ||||||
|  |         os.makedirs(os.path.join(map_out_path, 'images-optional')) | ||||||
|  | 
 | ||||||
|  |     class_names, _ = get_classes(classes_path) | ||||||
|  | 
 | ||||||
|  |     if map_mode == 0 or map_mode == 1: | ||||||
|  |         print("Load model.") | ||||||
|  |         yolo = YOLO(confidence=confidence, nms_iou=nms_iou) | ||||||
|  |         print("Load model done.") | ||||||
|  | 
 | ||||||
|  |         print("Get predict result.") | ||||||
|  |         for image_id in tqdm(image_ids): | ||||||
|  |             image_path = os.path.join(VOCdevkit_path, "VOC2007/JPEGImages/" + image_id + ".jpg") | ||||||
|  |             image = Image.open(image_path) | ||||||
|  |             if map_vis: | ||||||
|  |                 image.save(os.path.join(map_out_path, "images-optional/" + image_id + ".jpg")) | ||||||
|  |             yolo.get_map_txt(image_id, image, class_names, map_out_path) | ||||||
|  |         print("Get predict result done.") | ||||||
|  | 
 | ||||||
|  |     if map_mode == 0 or map_mode == 2: | ||||||
|  |         print("Get ground truth result.") | ||||||
|  |         for image_id in tqdm(image_ids): | ||||||
|  |             with open(os.path.join(map_out_path, "ground-truth/" + image_id + ".txt"), "w") as new_f: | ||||||
|  |                 root = ET.parse(os.path.join(VOCdevkit_path, "VOC2007/Annotations/" + image_id + ".xml")).getroot() | ||||||
|  |                 for obj in root.findall('object'): | ||||||
|  |                     difficult_flag = False | ||||||
|  |                     if obj.find('difficult') != None: | ||||||
|  |                         difficult = obj.find('difficult').text | ||||||
|  |                         if int(difficult) == 1: | ||||||
|  |                             difficult_flag = True | ||||||
|  |                     obj_name = obj.find('name').text | ||||||
|  |                     if obj_name not in class_names: | ||||||
|  |                         continue | ||||||
|  |                     bndbox = obj.find('bndbox') | ||||||
|  |                     left = bndbox.find('xmin').text | ||||||
|  |                     top = bndbox.find('ymin').text | ||||||
|  |                     right = bndbox.find('xmax').text | ||||||
|  |                     bottom = bndbox.find('ymax').text | ||||||
|  | 
 | ||||||
|  |                     if difficult_flag: | ||||||
|  |                         new_f.write("%s %s %s %s %s difficult\n" % (obj_name, left, top, right, bottom)) | ||||||
|  |                     else: | ||||||
|  |                         new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom)) | ||||||
|  |         print("Get ground truth result done.") | ||||||
|  | 
 | ||||||
|  |     if map_mode == 0 or map_mode == 3: | ||||||
|  |         print("Get map.") | ||||||
|  |         get_map(MINOVERLAP, True, score_threhold=score_threhold, path=map_out_path) | ||||||
|  |         print("Get map done.") | ||||||
|  | 
 | ||||||
|  |     if map_mode == 4: | ||||||
|  |         print("Get map.") | ||||||
|  |         get_coco_map(class_names=class_names, path=map_out_path) | ||||||
|  |         print("Get map done.") | ||||||
							
								
								
									
										167
									
								
								kmeans_for_anchors.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										167
									
								
								kmeans_for_anchors.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,167 @@ | |||||||
|  | # -------------------------------------------------------------------------------------------------------# | ||||||
|  | #   kmeans虽然会对数据集中的框进行聚类,但是很多数据集由于框的大小相近,聚类出来的9个框相差不大, | ||||||
|  | #   这样的框反而不利于模型的训练。因为不同的特征层适合不同大小的先验框,shape越小的特征层适合越大的先验框 | ||||||
|  | #   原始网络的先验框已经按大中小比例分配好了,不进行聚类也会有非常好的效果。 | ||||||
|  | # -------------------------------------------------------------------------------------------------------# | ||||||
|  | import glob | ||||||
|  | import xml.etree.ElementTree as ET | ||||||
|  | 
 | ||||||
|  | import matplotlib.pyplot as plt | ||||||
|  | import numpy as np | ||||||
|  | from tqdm import tqdm | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def cas_iou(box, cluster): | ||||||
|  |     x = np.minimum(cluster[:, 0], box[0]) | ||||||
|  |     y = np.minimum(cluster[:, 1], box[1]) | ||||||
|  | 
 | ||||||
|  |     intersection = x * y | ||||||
|  |     area1 = box[0] * box[1] | ||||||
|  | 
 | ||||||
|  |     area2 = cluster[:, 0] * cluster[:, 1] | ||||||
|  |     iou = intersection / (area1 + area2 - intersection) | ||||||
|  | 
 | ||||||
|  |     return iou | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def avg_iou(box, cluster): | ||||||
|  |     return np.mean([np.max(cas_iou(box[i], cluster)) for i in range(box.shape[0])]) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def kmeans(box, k): | ||||||
|  |     # -------------------------------------------------------------# | ||||||
|  |     #   取出一共有多少框 | ||||||
|  |     # -------------------------------------------------------------# | ||||||
|  |     row = box.shape[0] | ||||||
|  | 
 | ||||||
|  |     # -------------------------------------------------------------# | ||||||
|  |     #   每个框各个点的位置 | ||||||
|  |     # -------------------------------------------------------------# | ||||||
|  |     distance = np.empty((row, k)) | ||||||
|  | 
 | ||||||
|  |     # -------------------------------------------------------------# | ||||||
|  |     #   最后的聚类位置 | ||||||
|  |     # -------------------------------------------------------------# | ||||||
|  |     last_clu = np.zeros((row,)) | ||||||
|  | 
 | ||||||
|  |     np.random.seed() | ||||||
|  | 
 | ||||||
|  |     # -------------------------------------------------------------# | ||||||
|  |     #   随机选5个当聚类中心 | ||||||
|  |     # -------------------------------------------------------------# | ||||||
|  |     cluster = box[np.random.choice(row, k, replace=False)] | ||||||
|  | 
 | ||||||
|  |     iter = 0 | ||||||
|  |     while True: | ||||||
|  |         # -------------------------------------------------------------# | ||||||
|  |         #   计算当前框和先验框的宽高比例 | ||||||
|  |         # -------------------------------------------------------------# | ||||||
|  |         for i in range(row): | ||||||
|  |             distance[i] = 1 - cas_iou(box[i], cluster) | ||||||
|  | 
 | ||||||
|  |         # -------------------------------------------------------------# | ||||||
|  |         #   取出最小点 | ||||||
|  |         # -------------------------------------------------------------# | ||||||
|  |         near = np.argmin(distance, axis=1) | ||||||
|  | 
 | ||||||
|  |         if (last_clu == near).all(): | ||||||
|  |             break | ||||||
|  | 
 | ||||||
|  |         # -------------------------------------------------------------# | ||||||
|  |         #   求每一个类的中位点 | ||||||
|  |         # -------------------------------------------------------------# | ||||||
|  |         for j in range(k): | ||||||
|  |             cluster[j] = np.median( | ||||||
|  |                 box[near == j], axis=0) | ||||||
|  | 
 | ||||||
|  |         last_clu = near | ||||||
|  |         if iter % 5 == 0: | ||||||
|  |             print('iter: {:d}. avg_iou:{:.2f}'.format(iter, avg_iou(box, cluster))) | ||||||
|  |         iter += 1 | ||||||
|  | 
 | ||||||
|  |     return cluster, near | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def load_data(path): | ||||||
|  |     data = [] | ||||||
|  |     # -------------------------------------------------------------# | ||||||
|  |     #   对于每一个xml都寻找box | ||||||
|  |     # -------------------------------------------------------------# | ||||||
|  |     for xml_file in tqdm(glob.glob('{}/*xml'.format(path))): | ||||||
|  |         tree = ET.parse(xml_file) | ||||||
|  |         height = int(tree.findtext('./size/height')) | ||||||
|  |         width = int(tree.findtext('./size/width')) | ||||||
|  |         if height <= 0 or width <= 0: | ||||||
|  |             continue | ||||||
|  | 
 | ||||||
|  |         # -------------------------------------------------------------# | ||||||
|  |         #   对于每一个目标都获得它的宽高 | ||||||
|  |         # -------------------------------------------------------------# | ||||||
|  |         for obj in tree.iter('object'): | ||||||
|  |             xmin = int(float(obj.findtext('bndbox/xmin'))) / width | ||||||
|  |             ymin = int(float(obj.findtext('bndbox/ymin'))) / height | ||||||
|  |             xmax = int(float(obj.findtext('bndbox/xmax'))) / width | ||||||
|  |             ymax = int(float(obj.findtext('bndbox/ymax'))) / height | ||||||
|  | 
 | ||||||
|  |             xmin = np.float64(xmin) | ||||||
|  |             ymin = np.float64(ymin) | ||||||
|  |             xmax = np.float64(xmax) | ||||||
|  |             ymax = np.float64(ymax) | ||||||
|  |             # 得到宽高 | ||||||
|  |             data.append([xmax - xmin, ymax - ymin]) | ||||||
|  |     return np.array(data) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | if __name__ == '__main__': | ||||||
|  |     np.random.seed(0) | ||||||
|  |     # -------------------------------------------------------------# | ||||||
|  |     #   运行该程序会计算'./VOCdevkit/VOC2007/Annotations'的xml | ||||||
|  |     #   会生成yolo_anchors.txt | ||||||
|  |     # -------------------------------------------------------------# | ||||||
|  |     input_shape = [416, 416] | ||||||
|  |     anchors_num = 9 | ||||||
|  |     # -------------------------------------------------------------# | ||||||
|  |     #   载入数据集,可以使用VOC的xml | ||||||
|  |     # -------------------------------------------------------------# | ||||||
|  |     path = 'VOCdevkit/VOC2007/Annotations' | ||||||
|  | 
 | ||||||
|  |     # -------------------------------------------------------------# | ||||||
|  |     #   载入所有的xml | ||||||
|  |     #   存储格式为转化为比例后的width,height | ||||||
|  |     # -------------------------------------------------------------# | ||||||
|  |     print('Load xmls.') | ||||||
|  |     data = load_data(path) | ||||||
|  |     print('Load xmls done.') | ||||||
|  | 
 | ||||||
|  |     # -------------------------------------------------------------# | ||||||
|  |     #   使用k聚类算法 | ||||||
|  |     # -------------------------------------------------------------# | ||||||
|  |     print('K-means boxes.') | ||||||
|  |     cluster, near = kmeans(data, anchors_num) | ||||||
|  |     print('K-means boxes done.') | ||||||
|  |     data = data * np.array([input_shape[1], input_shape[0]]) | ||||||
|  |     cluster = cluster * np.array([input_shape[1], input_shape[0]]) | ||||||
|  | 
 | ||||||
|  |     # -------------------------------------------------------------# | ||||||
|  |     #   绘图 | ||||||
|  |     # -------------------------------------------------------------# | ||||||
|  |     for j in range(anchors_num): | ||||||
|  |         plt.scatter(data[near == j][:, 0], data[near == j][:, 1]) | ||||||
|  |         plt.scatter(cluster[j][0], cluster[j][1], marker='x', c='black') | ||||||
|  |     plt.savefig("kmeans_for_anchors.jpg") | ||||||
|  |     plt.show() | ||||||
|  |     print('Save kmeans_for_anchors.jpg in root dir.') | ||||||
|  | 
 | ||||||
|  |     cluster = cluster[np.argsort(cluster[:, 0] * cluster[:, 1])] | ||||||
|  |     print('avg_ratio:{:.2f}'.format(avg_iou(data, cluster))) | ||||||
|  |     print(cluster) | ||||||
|  | 
 | ||||||
|  |     f = open("yolo_anchors.txt", 'w') | ||||||
|  |     row = np.shape(cluster)[0] | ||||||
|  |     for i in range(row): | ||||||
|  |         if i == 0: | ||||||
|  |             x_y = "%d,%d" % (cluster[i][0], cluster[i][1]) | ||||||
|  |         else: | ||||||
|  |             x_y = ", %d,%d" % (cluster[i][0], cluster[i][1]) | ||||||
|  |         f.write(x_y) | ||||||
|  |     f.close() | ||||||
							
								
								
									
										531
									
								
								load_data.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										531
									
								
								load_data.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,531 @@ | |||||||
|  | import fnmatch | ||||||
|  | import math | ||||||
|  | import os | ||||||
|  | import sys | ||||||
|  | import time | ||||||
|  | from operator import itemgetter | ||||||
|  | 
 | ||||||
|  | import gc | ||||||
|  | import numpy as np | ||||||
|  | import torch | ||||||
|  | import torch.optim as optim | ||||||
|  | import torch.nn as nn | ||||||
|  | import torch.nn.functional as F | ||||||
|  | from PIL import Image | ||||||
|  | from torch.utils.data import Dataset | ||||||
|  | from torchvision import transforms | ||||||
|  | 
 | ||||||
|  | # from darknet import Darknet | ||||||
|  | 
 | ||||||
|  | from median_pool import MedianPool2d | ||||||
|  | 
 | ||||||
|  | # print('starting test read') | ||||||
|  | # im = Image.open('data/horse.jpg').convert('RGB') | ||||||
|  | # print('img read!') | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class MaxProbExtractor(nn.Module): | ||||||
|  |     """MaxProbExtractor: extracts max class probability for class from YOLO output. | ||||||
|  | 
 | ||||||
|  |     Module providing the functionality necessary to extract the max class probability for one class from YOLO output. | ||||||
|  | 
 | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     def __init__(self, cls_id, num_cls, config): | ||||||
|  |         super(MaxProbExtractor, self).__init__() | ||||||
|  |         self.cls_id = cls_id | ||||||
|  |         self.num_cls = num_cls | ||||||
|  |         self.config = config | ||||||
|  |         self.anchor_num = 3 | ||||||
|  | 
 | ||||||
|  |     def forward(self, YOLOoutput): | ||||||
|  |         # get values neccesary for transformation | ||||||
|  |         if YOLOoutput.dim() == 3: | ||||||
|  |             YOLOoutput = YOLOoutput.unsqueeze(0) | ||||||
|  |         batch = YOLOoutput.size(0) | ||||||
|  |         assert (YOLOoutput.size(1) == (5 + self.num_cls) * self.anchor_num) | ||||||
|  |         h = YOLOoutput.size(2) | ||||||
|  |         w = YOLOoutput.size(3) | ||||||
|  |         # transform the output tensor from [batch, 425, 19, 19] to [batch, 80, 1805] | ||||||
|  |         output = YOLOoutput.view(batch, self.anchor_num, 5 + self.num_cls, h * w)  # [batch, 5, 85, 361] | ||||||
|  |         output = output.transpose(1, 2).contiguous()  # [batch, 85, 5, 361] | ||||||
|  |         output = output.view(batch, 5 + self.num_cls, self.anchor_num * h * w)  # [batch, 85, 1805] | ||||||
|  |         output_objectness = torch.sigmoid(output[:, 4, :])  # [batch, 1805]  # 是否有物体 | ||||||
|  |         output = output[:, 5:5 + self.num_cls, :]  # [batch, 80, 1805] | ||||||
|  |         # perform softmax to normalize probabilities for object classes to [0,1] | ||||||
|  |         normal_confs = torch.nn.Softmax(dim=1)(output)  # 物体类别 | ||||||
|  |         # we only care for probabilities of the class of interest (person) | ||||||
|  |         confs_for_class = normal_confs[:, self.cls_id, :]   # 类别 序号对应的为人 | ||||||
|  |         confs_if_object = output_objectness  # confs_for_class * output_objectness | ||||||
|  |         confs_if_object = confs_for_class * output_objectness | ||||||
|  |         confs_if_object = self.config.loss_target(output_objectness, confs_for_class) | ||||||
|  |         # find the max probability for person | ||||||
|  |         max_conf, max_conf_idx = torch.max(confs_if_object, dim=1) | ||||||
|  | 
 | ||||||
|  |         return max_conf | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class NPSCalculator(nn.Module): | ||||||
|  |     """NMSCalculator: calculates the non-printability score of a patch. | ||||||
|  | 
 | ||||||
|  |     Module providing the functionality necessary to calculate the non-printability score (NMS) of an adversarial patch. | ||||||
|  | 
 | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     def __init__(self, printability_file, patch_side): | ||||||
|  |         super(NPSCalculator, self).__init__() | ||||||
|  |         self.printability_array = nn.Parameter(self.get_printability_array(printability_file, patch_side), | ||||||
|  |                                                requires_grad=False) | ||||||
|  | 
 | ||||||
|  |     def forward(self, adv_patch): | ||||||
|  |         # calculate euclidian distance between colors in patch and colors in printability_array  | ||||||
|  |         # square root of sum of squared difference | ||||||
|  |         color_dist = (adv_patch - self.printability_array + 0.000001) | ||||||
|  |         color_dist = color_dist ** 2 | ||||||
|  |         color_dist = torch.sum(color_dist, 1) + 0.000001 | ||||||
|  |         color_dist = torch.sqrt(color_dist) | ||||||
|  |         # only work with the min distance | ||||||
|  |         color_dist_prod = torch.min(color_dist, 0)[0]  # test: change prod for min (find distance to closest color) | ||||||
|  |         # calculate the nps by summing over all pixels | ||||||
|  |         nps_score = torch.sum(color_dist_prod, 0) | ||||||
|  |         nps_score = torch.sum(nps_score, 0) | ||||||
|  |         return nps_score / torch.numel(adv_patch) | ||||||
|  | 
 | ||||||
|  |     def get_printability_array(self, printability_file, side): | ||||||
|  |         printability_list = [] | ||||||
|  | 
 | ||||||
|  |         # read in printability triplets and put them in a list | ||||||
|  |         with open(printability_file) as f: | ||||||
|  |             for line in f: | ||||||
|  |                 printability_list.append(line.split(",")) | ||||||
|  | 
 | ||||||
|  |         printability_array = [] | ||||||
|  |         for printability_triplet in printability_list: | ||||||
|  |             printability_imgs = [] | ||||||
|  |             red, green, blue = printability_triplet | ||||||
|  |             printability_imgs.append(np.full((side, side), red)) | ||||||
|  |             printability_imgs.append(np.full((side, side), green)) | ||||||
|  |             printability_imgs.append(np.full((side, side), blue)) | ||||||
|  |             printability_array.append(printability_imgs) | ||||||
|  | 
 | ||||||
|  |         printability_array = np.asarray(printability_array) | ||||||
|  |         printability_array = np.float32(printability_array) | ||||||
|  |         pa = torch.from_numpy(printability_array) | ||||||
|  |         return pa | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class TotalVariation(nn.Module): | ||||||
|  |     """TotalVariation: calculates the total variation of a patch. | ||||||
|  | 
 | ||||||
|  |     Module providing the functionality necessary to calculate the total Variation (TV) of an adversarial patch. | ||||||
|  | 
 | ||||||
|  |     TotalVariation:计算补丁的总变化。 | ||||||
|  |     该模块提供了计算对抗性补丁的总变化 (TV) 所需的功能。 | ||||||
|  | 
 | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     def __init__(self): | ||||||
|  |         super(TotalVariation, self).__init__() | ||||||
|  | 
 | ||||||
|  |     def forward(self, adv_patch): | ||||||
|  |         # bereken de total variation van de adv_patch | ||||||
|  |         tvcomp1 = torch.sum(torch.abs(adv_patch[:, :, 1:] - adv_patch[:, :, :-1] + 0.000001), 0) | ||||||
|  |         tvcomp1 = torch.sum(torch.sum(tvcomp1, 0), 0) | ||||||
|  |         tvcomp2 = torch.sum(torch.abs(adv_patch[:, 1:, :] - adv_patch[:, :-1, :] + 0.000001), 0) | ||||||
|  |         tvcomp2 = torch.sum(torch.sum(tvcomp2, 0), 0) | ||||||
|  |         tv = tvcomp1 + tvcomp2 | ||||||
|  |         return tv / torch.numel(adv_patch) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class PatchTransformer(nn.Module): | ||||||
|  |     """PatchTransformer: transforms batch of patches | ||||||
|  | 
 | ||||||
|  |     Module providing the functionality necessary to transform a batch of patches, randomly adjusting brightness and | ||||||
|  |     contrast, adding random amount of noise, and rotating randomly. Resizes-patches according to as size based on the | ||||||
|  |     batch of labels, and pads them to the dimension of an image. | ||||||
|  | 
 | ||||||
|  |     变换一批补丁,随机调整亮度和对比度,添加随机数量的噪声,随机旋转。 根据标签批次的大小调整补丁大小,并将它们填充到图像的尺寸中。 | ||||||
|  | 
 | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     def __init__(self): | ||||||
|  |         super(PatchTransformer, self).__init__() | ||||||
|  |         self.min_contrast = 0.8 | ||||||
|  |         self.max_contrast = 1.2 | ||||||
|  |         self.min_brightness = -0.1 | ||||||
|  |         self.max_brightness = 0.1 | ||||||
|  |         self.noise_factor = 0.10 | ||||||
|  |         self.minangle = -20 / 180 * math.pi | ||||||
|  |         self.maxangle = 20 / 180 * math.pi | ||||||
|  |         self.medianpooler = MedianPool2d(7, same=True)  # 中值池化 | ||||||
|  |         ''' | ||||||
|  |         kernel = torch.cuda.FloatTensor([[0.003765, 0.015019, 0.023792, 0.015019, 0.003765],                                                                                     | ||||||
|  |                                          [0.015019, 0.059912, 0.094907, 0.059912, 0.015019],                                                                                     | ||||||
|  |                                          [0.023792, 0.094907, 0.150342, 0.094907, 0.023792],                                                                                     | ||||||
|  |                                          [0.015019, 0.059912, 0.094907, 0.059912, 0.015019],                                                                                     | ||||||
|  |                                          [0.003765, 0.015019, 0.023792, 0.015019, 0.003765]]) | ||||||
|  |         self.kernel = kernel.unsqueeze(0).unsqueeze(0).expand(3,3,-1,-1) | ||||||
|  |         ''' | ||||||
|  | 
 | ||||||
|  |     def forward(self, adv_patch, lab_batch, img_size, do_rotate=True, rand_loc=True): | ||||||
|  |         # adv_patch = F.conv2d(adv_patch.unsqueeze(0),self.kernel,padding=(2,2)) | ||||||
|  |         adv_patch = self.medianpooler(adv_patch.unsqueeze(0)) | ||||||
|  |         # Determine size of padding | ||||||
|  |         pad = (img_size - adv_patch.size(-1)) / 2 | ||||||
|  |         # Make a batch of patches | ||||||
|  |         adv_patch = adv_patch.unsqueeze(0)  # .unsqueeze(0)  # 这里又扩大一维,变成5维  1, 1, 3, 300, 300 | ||||||
|  |         adv_batch = adv_patch.expand(lab_batch.size(0), lab_batch.size(1), -1, -1, -1)  # adv_batch !! 不是adv_patch!! 8, 14, 3, 300, 300 | ||||||
|  |         batch_size = torch.Size((lab_batch.size(0), lab_batch.size(1)))  # 8, 14 | ||||||
|  | 
 | ||||||
|  |         # Contrast, brightness and noise transforms | ||||||
|  | 
 | ||||||
|  |         # Create random contrast tensor | ||||||
|  |         contrast = torch.cuda.FloatTensor(batch_size).uniform_(self.min_contrast, self.max_contrast) | ||||||
|  |         contrast = contrast.unsqueeze(-1).unsqueeze(-1).unsqueeze(-1) | ||||||
|  |         contrast = contrast.expand(-1, -1, adv_batch.size(-3), adv_batch.size(-2), adv_batch.size(-1)) | ||||||
|  |         contrast = contrast.cuda() | ||||||
|  | 
 | ||||||
|  |         # Create random brightness tensor | ||||||
|  |         brightness = torch.cuda.FloatTensor(batch_size).uniform_(self.min_brightness, self.max_brightness) | ||||||
|  |         brightness = brightness.unsqueeze(-1).unsqueeze(-1).unsqueeze(-1) | ||||||
|  |         brightness = brightness.expand(-1, -1, adv_batch.size(-3), adv_batch.size(-2), adv_batch.size(-1)) | ||||||
|  |         brightness = brightness.cuda() | ||||||
|  | 
 | ||||||
|  |         # Create random noise tensor | ||||||
|  |         noise = torch.cuda.FloatTensor(adv_batch.size()).uniform_(-1, 1) * self.noise_factor | ||||||
|  | 
 | ||||||
|  |         # Apply contrast/brightness/noise, clamp | ||||||
|  |         adv_batch = adv_batch * contrast + brightness + noise | ||||||
|  | 
 | ||||||
|  |         adv_batch = torch.clamp(adv_batch, 0.000001, 0.99999)  # 限制到0到1之间 | ||||||
|  | 
 | ||||||
|  |         # Where the label class_id is 1 we don't want a patch (padding) --> fill mask with zero's | ||||||
|  |         cls_ids = torch.narrow(lab_batch, 2, 0, 1)  # torch.narrow(input,dim,start,length)  从dim开始,返回共享内存的数据start到start+length-1 | ||||||
|  |         cls_mask = cls_ids.expand(-1, -1, 3)        # 接上,这里取出 lab_batch的代表id那列,相当于现在的lab_batch[..., 0] | ||||||
|  |         cls_mask = cls_mask.unsqueeze(-1) | ||||||
|  |         cls_mask = cls_mask.expand(-1, -1, -1, adv_batch.size(3)) | ||||||
|  |         cls_mask = cls_mask.unsqueeze(-1) | ||||||
|  |         cls_mask = cls_mask.expand(-1, -1, -1, -1, adv_batch.size(4))  # cls_mask 的大小是 8, 14, 3, 300, 300  数据是类别 | ||||||
|  |         msk_batch = torch.cuda.FloatTensor(cls_mask.size()).fill_(1) - cls_mask  # 这里取出有人所对应的msk | ||||||
|  | 
 | ||||||
|  |         # Pad patch and mask to image dimensions | ||||||
|  |         mypad = nn.ConstantPad2d((int(pad + 0.5), int(pad), int(pad + 0.5), int(pad)), 0)  # (padding_left、padding_right、padding_top、padding_bottom) 填充0 | ||||||
|  |         adv_batch = mypad(adv_batch)  # 用0填充到416 | ||||||
|  |         msk_batch = mypad(msk_batch) | ||||||
|  | 
 | ||||||
|  |         # Rotation and rescaling transforms | ||||||
|  |         anglesize = (lab_batch.size(0) * lab_batch.size(1))  # 这里是旋转的数量 | ||||||
|  |         if do_rotate: | ||||||
|  |             angle = torch.cuda.FloatTensor(anglesize).uniform_(self.minangle, self.maxangle) | ||||||
|  |         else: | ||||||
|  |             angle = torch.cuda.FloatTensor(anglesize).fill_(0) | ||||||
|  | 
 | ||||||
|  |         # Resizes and rotates | ||||||
|  |         current_patch_size = adv_patch.size(-1) | ||||||
|  |         lab_batch_scaled = torch.cuda.FloatTensor(lab_batch.size()).fill_(0)  # lab_batch_scaled是在原图上的尺寸? | ||||||
|  |         lab_batch_scaled[:, :, 1] = lab_batch[:, :, 1] * img_size | ||||||
|  |         lab_batch_scaled[:, :, 2] = lab_batch[:, :, 2] * img_size | ||||||
|  |         lab_batch_scaled[:, :, 3] = lab_batch[:, :, 3] * img_size | ||||||
|  |         lab_batch_scaled[:, :, 4] = lab_batch[:, :, 4] * img_size | ||||||
|  |         target_size = torch.sqrt( | ||||||
|  |             ((lab_batch_scaled[:, :, 3].mul(0.2)) ** 2) + ((lab_batch_scaled[:, :, 4].mul(0.2)) ** 2)) | ||||||
|  |         target_x = lab_batch[:, :, 1].view(np.prod(batch_size)) | ||||||
|  |         target_y = lab_batch[:, :, 2].view(np.prod(batch_size)) | ||||||
|  |         targetoff_x = lab_batch[:, :, 3].view(np.prod(batch_size)) | ||||||
|  |         targetoff_y = lab_batch[:, :, 4].view(np.prod(batch_size)) | ||||||
|  |         if (rand_loc): | ||||||
|  |             off_x = targetoff_x * (torch.cuda.FloatTensor(targetoff_x.size()).uniform_(-0.4, 0.4)) | ||||||
|  |             target_x = target_x + off_x | ||||||
|  |             off_y = targetoff_y * (torch.cuda.FloatTensor(targetoff_y.size()).uniform_(-0.4, 0.4)) | ||||||
|  |             target_y = target_y + off_y | ||||||
|  |         target_y = target_y - 0.05 | ||||||
|  |         scale = target_size / current_patch_size  # 原图相对于补丁大小的缩放因子? | ||||||
|  |         scale = scale.view(anglesize) | ||||||
|  | 
 | ||||||
|  |         s = adv_batch.size() | ||||||
|  |         adv_batch = adv_batch.view(s[0] * s[1], s[2], s[3], s[4]) | ||||||
|  |         msk_batch = msk_batch.view(s[0] * s[1], s[2], s[3], s[4]) | ||||||
|  | 
 | ||||||
|  |         tx = (-target_x + 0.5) * 2 | ||||||
|  |         ty = (-target_y + 0.5) * 2 | ||||||
|  |         sin = torch.sin(angle) | ||||||
|  |         cos = torch.cos(angle) | ||||||
|  | 
 | ||||||
|  |         # Theta = rotation,rescale matrix | ||||||
|  |         theta = torch.cuda.FloatTensor(anglesize, 2, 3).fill_(0) | ||||||
|  |         theta[:, 0, 0] = cos / scale | ||||||
|  |         theta[:, 0, 1] = sin / scale | ||||||
|  |         theta[:, 0, 2] = tx * cos / scale + ty * sin / scale | ||||||
|  |         theta[:, 1, 0] = -sin / scale | ||||||
|  |         theta[:, 1, 1] = cos / scale | ||||||
|  |         theta[:, 1, 2] = -tx * sin / scale + ty * cos / scale | ||||||
|  | 
 | ||||||
|  |         b_sh = adv_batch.shape | ||||||
|  |         grid = F.affine_grid(theta, adv_batch.shape) | ||||||
|  | 
 | ||||||
|  |         adv_batch_t = F.grid_sample(adv_batch, grid) | ||||||
|  |         msk_batch_t = F.grid_sample(msk_batch, grid) | ||||||
|  | 
 | ||||||
|  |         ''' | ||||||
|  |         # Theta2 = translation matrix | ||||||
|  |         theta2 = torch.cuda.FloatTensor(anglesize, 2, 3).fill_(0) | ||||||
|  |         theta2[:, 0, 0] = 1 | ||||||
|  |         theta2[:, 0, 1] = 0 | ||||||
|  |         theta2[:, 0, 2] = (-target_x + 0.5) * 2 | ||||||
|  |         theta2[:, 1, 0] = 0 | ||||||
|  |         theta2[:, 1, 1] = 1 | ||||||
|  |         theta2[:, 1, 2] = (-target_y + 0.5) * 2 | ||||||
|  | 
 | ||||||
|  |         grid2 = F.affine_grid(theta2, adv_batch.shape) | ||||||
|  |         adv_batch_t = F.grid_sample(adv_batch_t, grid2) | ||||||
|  |         msk_batch_t = F.grid_sample(msk_batch_t, grid2) | ||||||
|  | 
 | ||||||
|  |         ''' | ||||||
|  |         adv_batch_t = adv_batch_t.view(s[0], s[1], s[2], s[3], s[4]) | ||||||
|  |         msk_batch_t = msk_batch_t.view(s[0], s[1], s[2], s[3], s[4]) | ||||||
|  | 
 | ||||||
|  |         adv_batch_t = torch.clamp(adv_batch_t, 0.000001, 0.999999) | ||||||
|  |         # img = msk_batch_t[0, 0, :, :, :].detach().cpu() | ||||||
|  |         # img = transforms.ToPILImage()(img) | ||||||
|  |         # img.show() | ||||||
|  |         # exit() | ||||||
|  | 
 | ||||||
|  |         return adv_batch_t * msk_batch_t | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class PatchApplier(nn.Module): | ||||||
|  |     """PatchApplier: applies adversarial patches to images. | ||||||
|  | 
 | ||||||
|  |     Module providing the functionality necessary to apply a patch to all detections in all images in the batch. | ||||||
|  | 
 | ||||||
|  |     PatchApplier:对图像应用对抗补丁。 | ||||||
|  | 
 | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     def __init__(self): | ||||||
|  |         super(PatchApplier, self).__init__() | ||||||
|  | 
 | ||||||
|  |     def forward(self, img_batch, adv_batch): | ||||||
|  |         advs = torch.unbind(adv_batch, 1)  # 沿1维解开 | ||||||
|  |         for adv in advs: | ||||||
|  |             img_batch = torch.where((adv == 0), img_batch, adv)  # 对图像相应的坐标位置替换其像素?好像还没到图像的环节 | ||||||
|  |         return img_batch | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | ''' | ||||||
|  | class PatchGenerator(nn.Module): | ||||||
|  |     """PatchGenerator: network module that generates adversarial patches. | ||||||
|  | 
 | ||||||
|  |     Module representing the neural network that will generate adversarial patches. | ||||||
|  | 
 | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     def __init__(self, cfgfile, weightfile, img_dir, lab_dir): | ||||||
|  |         super(PatchGenerator, self).__init__() | ||||||
|  |         self.yolo = Darknet(cfgfile).load_weights(weightfile) | ||||||
|  |         self.dataloader = torch.utils.data.DataLoader(InriaDataset(img_dir, lab_dir, shuffle=True), | ||||||
|  |                                                       batch_size=5, | ||||||
|  |                                                       shuffle=True) | ||||||
|  |         self.patchapplier = PatchApplier() | ||||||
|  |         self.nmscalculator = NMSCalculator() | ||||||
|  |         self.totalvariation = TotalVariation() | ||||||
|  | 
 | ||||||
|  |     def forward(self, *input): | ||||||
|  |         pass | ||||||
|  | ''' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class InriaDataset(Dataset): | ||||||
|  |     """InriaDataset: representation of the INRIA person dataset. | ||||||
|  | 
 | ||||||
|  |     Internal representation of the commonly used INRIA person dataset. | ||||||
|  |     Available at: http://pascal.inrialpes.fr/data/human/ | ||||||
|  | 
 | ||||||
|  |     Attributes: | ||||||
|  |         len: An integer number of elements in the | ||||||
|  |         img_dir: Directory containing the images of the INRIA dataset. | ||||||
|  |         lab_dir: Directory containing the labels of the INRIA dataset. | ||||||
|  |         img_names: List of all image file names in img_dir. | ||||||
|  |         shuffle: Whether or not to shuffle the dataset. | ||||||
|  | 
 | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     def __init__(self, img_dir, lab_dir, max_lab, imgsize, shuffle=True): | ||||||
|  |         n_png_images = len(fnmatch.filter(os.listdir(img_dir), '*.png'))  # 614  fnmatch.filter返回一个list | ||||||
|  |         n_jpg_images = len(fnmatch.filter(os.listdir(img_dir), '*.jpg'))  # 0 | ||||||
|  |         n_images = n_png_images + n_jpg_images  # 图像的总数 | ||||||
|  |         n_labels = len(fnmatch.filter(os.listdir(lab_dir), '*.txt')) | ||||||
|  |         assert n_images == n_labels, "Number of images and number of labels don't match" | ||||||
|  |         self.len = n_images | ||||||
|  |         self.img_dir = img_dir | ||||||
|  |         self.lab_dir = lab_dir | ||||||
|  |         self.imgsize = imgsize | ||||||
|  |         self.img_names = fnmatch.filter(os.listdir(img_dir), '*.png') + fnmatch.filter(os.listdir(img_dir), '*.jpg') | ||||||
|  |         self.shuffle = shuffle | ||||||
|  |         self.img_paths = [] | ||||||
|  |         for img_name in self.img_names: | ||||||
|  |             self.img_paths.append(os.path.join(self.img_dir, img_name)) | ||||||
|  |         self.lab_paths = [] | ||||||
|  |         for img_name in self.img_names: | ||||||
|  |             lab_path = os.path.join(self.lab_dir, img_name).replace('.jpg', '.txt').replace('.png', '.txt') | ||||||
|  |             self.lab_paths.append(lab_path) | ||||||
|  |         self.max_n_labels = max_lab  # label的长度 | ||||||
|  | 
 | ||||||
|  |     def __len__(self): | ||||||
|  |         return self.len | ||||||
|  | 
 | ||||||
|  |     def __getitem__(self, idx): | ||||||
|  |         assert idx <= len(self), 'index range error' | ||||||
|  |         img_path = os.path.join(self.img_dir, self.img_names[idx]) | ||||||
|  |         lab_path = os.path.join(self.lab_dir, self.img_names[idx]).replace('.jpg', '.txt').replace('.png', '.txt') | ||||||
|  |         image = Image.open(img_path).convert('RGB') | ||||||
|  |         if os.path.getsize(lab_path):  # check to see if label file contains data. | ||||||
|  |             label = np.loadtxt(lab_path) | ||||||
|  |         else: | ||||||
|  |             label = np.ones([5]) | ||||||
|  | 
 | ||||||
|  |         label = torch.from_numpy(label).float() | ||||||
|  |         if label.dim() == 1: | ||||||
|  |             label = label.unsqueeze(0) | ||||||
|  | 
 | ||||||
|  |         image, label = self.pad_and_scale(image, label) | ||||||
|  |         transform = transforms.ToTensor() | ||||||
|  |         image = transform(image) | ||||||
|  |         label = self.pad_lab(label) | ||||||
|  |         # print("image size :", image.shape) | ||||||
|  |         # print("label size :", label.shape) | ||||||
|  |         return image, label | ||||||
|  | 
 | ||||||
|  |     def pad_and_scale(self, img, lab): | ||||||
|  |         """ | ||||||
|  | 
 | ||||||
|  |         Args: | ||||||
|  |             img: | ||||||
|  | 
 | ||||||
|  |         Returns: | ||||||
|  | 
 | ||||||
|  |         """ | ||||||
|  |         w, h = img.size | ||||||
|  |         if w == h: | ||||||
|  |             padded_img = img | ||||||
|  |         else: | ||||||
|  |             dim_to_pad = 1 if w < h else 2 | ||||||
|  |             if dim_to_pad == 1: | ||||||
|  |                 padding = (h - w) / 2 | ||||||
|  |                 padded_img = Image.new('RGB', (h, h), color=(127, 127, 127)) | ||||||
|  |                 padded_img.paste(img, (int(padding), 0)) | ||||||
|  |                 lab[:, [1]] = (lab[:, [1]] * w + padding) / h | ||||||
|  |                 lab[:, [3]] = (lab[:, [3]] * w / h) | ||||||
|  |             else: | ||||||
|  |                 padding = (w - h) / 2 | ||||||
|  |                 padded_img = Image.new('RGB', (w, w), color=(127, 127, 127)) | ||||||
|  |                 padded_img.paste(img, (0, int(padding))) | ||||||
|  |                 lab[:, [2]] = (lab[:, [2]] * h + padding) / w | ||||||
|  |                 lab[:, [4]] = (lab[:, [4]] * h / w) | ||||||
|  |         resize = transforms.Resize((self.imgsize, self.imgsize)) | ||||||
|  |         padded_img = resize(padded_img)  # choose here | ||||||
|  |         return padded_img, lab | ||||||
|  | 
 | ||||||
|  |     def pad_lab(self, lab): | ||||||
|  |         pad_size = self.max_n_labels - lab.shape[0] | ||||||
|  |         if (pad_size > 0): | ||||||
|  |             padded_lab = F.pad(lab, (0, 0, 0, pad_size), value=1)  # (左边填充数, 右边填充数, 上边填充数, 下边填充数) | ||||||
|  |         else: | ||||||
|  |             padded_lab = lab | ||||||
|  |         return padded_lab | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | if __name__ == '__main__': | ||||||
|  |     if len(sys.argv) == 3: | ||||||
|  |         img_dir = sys.argv[1] | ||||||
|  |         lab_dir = sys.argv[2] | ||||||
|  | 
 | ||||||
|  |     else: | ||||||
|  |         print('Usage: ') | ||||||
|  |         print('  python load_data.py img_dir lab_dir') | ||||||
|  |         sys.exit() | ||||||
|  | 
 | ||||||
|  |     test_loader = torch.utils.data.DataLoader(InriaDataset(img_dir, lab_dir, shuffle=True), | ||||||
|  |                                               batch_size=3, shuffle=True) | ||||||
|  | 
 | ||||||
|  |     cfgfile = "cfg/yolov2.cfg" | ||||||
|  |     weightfile = "weights/yolov2.weights" | ||||||
|  |     printfile = "non_printability/30values.txt" | ||||||
|  | 
 | ||||||
|  |     patch_size = 400 | ||||||
|  | 
 | ||||||
|  |     darknet_model = Darknet(cfgfile) | ||||||
|  |     darknet_model.load_weights(weightfile) | ||||||
|  |     darknet_model = darknet_model.cuda() | ||||||
|  |     patch_applier = PatchApplier().cuda() | ||||||
|  |     patch_transformer = PatchTransformer().cuda() | ||||||
|  |     prob_extractor = MaxProbExtractor(0, 80).cuda() | ||||||
|  |     nms_calculator = NMSCalculator(printfile, patch_size) | ||||||
|  |     total_variation = TotalVariation() | ||||||
|  |     ''' | ||||||
|  |     img = Image.open('data/horse.jpg').convert('RGB') | ||||||
|  |     img = img.resize((darknet_model.width, darknet_model.height)) | ||||||
|  |     width = img.width | ||||||
|  |     height = img.height | ||||||
|  |     img = torch.ByteTensor(torch.ByteStorage.from_buffer(img.tobytes())) | ||||||
|  |     img = img.view(height, width, 3).transpose(0, 1).transpose(0, 2).contiguous() | ||||||
|  |     img = img.view(1, 3, height, width) | ||||||
|  |     img = img.float().div(255.0) | ||||||
|  |     img = torch.autograd.Variable(img) | ||||||
|  | 
 | ||||||
|  |     output = darknet_model(img) | ||||||
|  |     ''' | ||||||
|  |     optimizer = torch.optim.Adam(model.parameters(), lr=0.0001) | ||||||
|  | 
 | ||||||
|  |     tl0 = time.time() | ||||||
|  |     tl1 = time.time() | ||||||
|  |     for i_batch, (img_batch, lab_batch) in enumerate(test_loader): | ||||||
|  |         tl1 = time.time() | ||||||
|  |         print('time to fetch items: ', tl1 - tl0) | ||||||
|  |         img_batch = img_batch.cuda() | ||||||
|  |         lab_batch = lab_batch.cuda() | ||||||
|  |         adv_patch = Image.open('data/horse.jpg').convert('RGB') | ||||||
|  |         adv_patch = adv_patch.resize((patch_size, patch_size)) | ||||||
|  |         transform = transforms.ToTensor() | ||||||
|  |         adv_patch = transform(adv_patch).cuda() | ||||||
|  |         img_size = img_batch.size(-1) | ||||||
|  |         print('transforming patches') | ||||||
|  |         t0 = time.time() | ||||||
|  |         adv_batch_t = patch_transformer.forward(adv_patch, lab_batch, img_size) | ||||||
|  |         print('applying patches') | ||||||
|  |         t1 = time.time() | ||||||
|  |         img_batch = patch_applier.forward(img_batch, adv_batch_t) | ||||||
|  |         img_batch = torch.autograd.Variable(img_batch) | ||||||
|  |         img_batch = F.interpolate(img_batch, (darknet_model.height, darknet_model.width)) | ||||||
|  |         print('running patched images through model') | ||||||
|  |         t2 = time.time() | ||||||
|  | 
 | ||||||
|  |         for obj in gc.get_objects(): | ||||||
|  |             try: | ||||||
|  |                 if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)): | ||||||
|  |                     try: | ||||||
|  |                         print(type(obj), obj.size()) | ||||||
|  |                     except: | ||||||
|  |                         pass | ||||||
|  |             except: | ||||||
|  |                 pass | ||||||
|  | 
 | ||||||
|  |         print(torch.cuda.memory_allocated()) | ||||||
|  | 
 | ||||||
|  |         output = darknet_model(img_batch) | ||||||
|  |         print('extracting max probs') | ||||||
|  |         t3 = time.time() | ||||||
|  |         max_prob = prob_extractor(output) | ||||||
|  |         t4 = time.time() | ||||||
|  |         nms = nms_calculator.forward(adv_patch) | ||||||
|  |         tv = total_variation(adv_patch) | ||||||
|  |         print('---------------------------------') | ||||||
|  |         print('        patch transformation : %f' % (t1 - t0)) | ||||||
|  |         print('           patch application : %f' % (t2 - t1)) | ||||||
|  |         print('             darknet forward : %f' % (t3 - t2)) | ||||||
|  |         print('      probability extraction : %f' % (t4 - t3)) | ||||||
|  |         print('---------------------------------') | ||||||
|  |         print('          total forward pass : %f' % (t4 - t0)) | ||||||
|  |         del img_batch, lab_batch, adv_patch, adv_batch_t, output, max_prob | ||||||
|  |         torch.cuda.empty_cache() | ||||||
|  |         tl0 = time.time() | ||||||
							
								
								
									
										50
									
								
								median_pool.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								median_pool.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,50 @@ | |||||||
|  | import math | ||||||
|  | import torch | ||||||
|  | import torch.nn as nn | ||||||
|  | import torch.nn.functional as F | ||||||
|  | from torch.nn.modules.utils import _pair, _quadruple | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class MedianPool2d(nn.Module): | ||||||
|  |     """ Median pool (usable as median filter when stride=1) module. | ||||||
|  |      | ||||||
|  |     Args: | ||||||
|  |          kernel_size: size of pooling kernel, int or 2-tuple | ||||||
|  |          stride: pool stride, int or 2-tuple | ||||||
|  |          padding: pool padding, int or 4-tuple (l, r, t, b) as in pytorch F.pad | ||||||
|  |          same: override padding and enforce same padding, boolean | ||||||
|  |     """ | ||||||
|  |     def __init__(self, kernel_size=3, stride=1, padding=0, same=False): | ||||||
|  |         super(MedianPool2d, self).__init__() | ||||||
|  |         self.k = _pair(kernel_size) | ||||||
|  |         self.stride = _pair(stride) | ||||||
|  |         self.padding = _quadruple(padding)  # convert to l, r, t, b | ||||||
|  |         self.same = same | ||||||
|  | 
 | ||||||
|  |     def _padding(self, x): | ||||||
|  |         if self.same: | ||||||
|  |             ih, iw = x.size()[2:] | ||||||
|  |             if ih % self.stride[0] == 0: | ||||||
|  |                 ph = max(self.k[0] - self.stride[0], 0) | ||||||
|  |             else: | ||||||
|  |                 ph = max(self.k[0] - (ih % self.stride[0]), 0) | ||||||
|  |             if iw % self.stride[1] == 0: | ||||||
|  |                 pw = max(self.k[1] - self.stride[1], 0) | ||||||
|  |             else: | ||||||
|  |                 pw = max(self.k[1] - (iw % self.stride[1]), 0) | ||||||
|  |             pl = pw // 2 | ||||||
|  |             pr = pw - pl | ||||||
|  |             pt = ph // 2 | ||||||
|  |             pb = ph - pt | ||||||
|  |             padding = (pl, pr, pt, pb) | ||||||
|  |         else: | ||||||
|  |             padding = self.padding | ||||||
|  |         return padding | ||||||
|  |      | ||||||
|  |     def forward(self, x): | ||||||
|  |         # using existing pytorch functions and tensor ops so that we get autograd,  | ||||||
|  |         # would likely be more efficient to implement from scratch at C/Cuda level | ||||||
|  |         x = F.pad(x, self._padding(x), mode='reflect') | ||||||
|  |         x = x.unfold(2, self.k[0], self.stride[0]).unfold(3, self.k[1], self.stride[1]) | ||||||
|  |         x = x.contiguous().view(x.size()[:4] + (-1,)).median(dim=-1)[0] | ||||||
|  |         return x | ||||||
							
								
								
									
										1
									
								
								nets/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								nets/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1 @@ | |||||||
|  | # | ||||||
							
								
								
									
										101
									
								
								nets/darknet.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										101
									
								
								nets/darknet.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,101 @@ | |||||||
|  | import math | ||||||
|  | from collections import OrderedDict | ||||||
|  | 
 | ||||||
|  | import torch.nn as nn | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # ---------------------------------------------------------------------# | ||||||
|  | #   残差结构 | ||||||
|  | #   利用一个1x1卷积下降通道数,然后利用一个3x3卷积提取特征并且上升通道数 | ||||||
|  | #   最后接上一个残差边 | ||||||
|  | # ---------------------------------------------------------------------# | ||||||
|  | class BasicBlock(nn.Module): | ||||||
|  |     def __init__(self, inplanes, planes): | ||||||
|  |         super(BasicBlock, self).__init__() | ||||||
|  |         self.conv1 = nn.Conv2d(inplanes, planes[0], kernel_size=1, stride=1, padding=0, bias=False)  # 从大通道转化小通道。又从小通道转为大通道。 | ||||||
|  |         self.bn1 = nn.BatchNorm2d(planes[0]) | ||||||
|  |         self.relu1 = nn.LeakyReLU(0.1) | ||||||
|  | 
 | ||||||
|  |         self.conv2 = nn.Conv2d(planes[0], planes[1], kernel_size=3, stride=1, padding=1, bias=False) | ||||||
|  |         self.bn2 = nn.BatchNorm2d(planes[1]) | ||||||
|  |         self.relu2 = nn.LeakyReLU(0.1) | ||||||
|  | 
 | ||||||
|  |     def forward(self, x): | ||||||
|  |         residual = x | ||||||
|  | 
 | ||||||
|  |         out = self.conv1(x) | ||||||
|  |         out = self.bn1(out) | ||||||
|  |         out = self.relu1(out) | ||||||
|  | 
 | ||||||
|  |         out = self.conv2(out) | ||||||
|  |         out = self.bn2(out) | ||||||
|  |         out = self.relu2(out) | ||||||
|  | 
 | ||||||
|  |         out += residual | ||||||
|  |         return out | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class DarkNet(nn.Module): | ||||||
|  |     def __init__(self, layers): | ||||||
|  |         super(DarkNet, self).__init__() | ||||||
|  |         self.inplanes = 32  # 第一次卷积,输出通道为32 | ||||||
|  |         # 416,416,3 -> 416,416,32 | ||||||
|  |         self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False) | ||||||
|  |         self.bn1 = nn.BatchNorm2d(self.inplanes) | ||||||
|  |         self.relu1 = nn.LeakyReLU(0.1) | ||||||
|  | 
 | ||||||
|  |         # 416,416,32 -> 208,208,64 | ||||||
|  |         self.layer1 = self._make_layer([32, 64], layers[0])  # layers 中保存的是程序块重复的次数 | ||||||
|  |         # 208,208,64 -> 104,104,128 | ||||||
|  |         self.layer2 = self._make_layer([64, 128], layers[1]) | ||||||
|  |         # 104,104,128 -> 52,52,256 | ||||||
|  |         self.layer3 = self._make_layer([128, 256], layers[2]) | ||||||
|  |         # 52,52,256 -> 26,26,512 | ||||||
|  |         self.layer4 = self._make_layer([256, 512], layers[3]) | ||||||
|  |         # 26,26,512 -> 13,13,1024 | ||||||
|  |         self.layer5 = self._make_layer([512, 1024], layers[4]) | ||||||
|  | 
 | ||||||
|  |         self.layers_out_filters = [64, 128, 256, 512, 1024] | ||||||
|  | 
 | ||||||
|  |         # 进行权值初始化 | ||||||
|  |         for m in self.modules(): | ||||||
|  |             if isinstance(m, nn.Conv2d): | ||||||
|  |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels | ||||||
|  |                 m.weight.data.normal_(0, math.sqrt(2. / n)) | ||||||
|  |             elif isinstance(m, nn.BatchNorm2d): | ||||||
|  |                 m.weight.data.fill_(1) | ||||||
|  |                 m.bias.data.zero_() | ||||||
|  | 
 | ||||||
|  |     # ---------------------------------------------------------------------# | ||||||
|  |     #   在每一个layer里面,首先利用一个步长为2的3x3卷积进行下采样 | ||||||
|  |     #   然后进行残差结构的堆叠 | ||||||
|  |     # ---------------------------------------------------------------------# | ||||||
|  |     def _make_layer(self, planes, blocks): | ||||||
|  |         layers = [] | ||||||
|  |         # 下采样,步长为2,卷积核大小为3  # 进入_make_layer先创建一层网络,用于降采样,然后再是多个重复的block | ||||||
|  |         layers.append(("ds_conv", nn.Conv2d(self.inplanes, planes[1], kernel_size=3, stride=2, padding=1, bias=False))) | ||||||
|  |         layers.append(("ds_bn", nn.BatchNorm2d(planes[1]))) | ||||||
|  |         layers.append(("ds_relu", nn.LeakyReLU(0.1))) | ||||||
|  |         # 加入残差结构 | ||||||
|  |         self.inplanes = planes[1]  # 保存这一层的输出通道,也是下一层的输入通道 | ||||||
|  |         for i in range(0, blocks): | ||||||
|  |             layers.append(("residual_{}".format(i), BasicBlock(self.inplanes, planes))) | ||||||
|  |         return nn.Sequential(OrderedDict(layers)) | ||||||
|  | 
 | ||||||
|  |     def forward(self, x): | ||||||
|  |         x = self.conv1(x) | ||||||
|  |         x = self.bn1(x) | ||||||
|  |         x = self.relu1(x) | ||||||
|  | 
 | ||||||
|  |         x = self.layer1(x) | ||||||
|  |         x = self.layer2(x) | ||||||
|  |         out3 = self.layer3(x) | ||||||
|  |         out4 = self.layer4(out3) | ||||||
|  |         out5 = self.layer5(out4) | ||||||
|  | 
 | ||||||
|  |         return out3, out4, out5 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def darknet53(): | ||||||
|  |     model = DarkNet([1, 2, 8, 8, 4]) | ||||||
|  |     return model | ||||||
							
								
								
									
										111
									
								
								nets/yolo.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										111
									
								
								nets/yolo.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,111 @@ | |||||||
|  | from collections import OrderedDict | ||||||
|  | 
 | ||||||
|  | import torch | ||||||
|  | import torch.nn as nn | ||||||
|  | 
 | ||||||
|  | from nets.darknet import darknet53 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def conv2d(filter_in, filter_out, kernel_size): | ||||||
|  |     pad = (kernel_size - 1) // 2 if kernel_size else 0 | ||||||
|  |     return nn.Sequential(OrderedDict([ | ||||||
|  |         ("conv", nn.Conv2d(filter_in, filter_out, kernel_size=kernel_size, stride=1, padding=pad, bias=False)), | ||||||
|  |         ("bn", nn.BatchNorm2d(filter_out)), | ||||||
|  |         ("relu", nn.LeakyReLU(0.1)), | ||||||
|  |     ])) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # ------------------------------------------------------------------------# | ||||||
|  | #   make_last_layers里面一共有七个卷积,前五个用于提取特征。 | ||||||
|  | #   后两个用于获得yolo网络的预测结果 | ||||||
|  | # ------------------------------------------------------------------------# | ||||||
|  | def make_last_layers(filters_list, in_filters, out_filter): | ||||||
|  |     m = nn.Sequential( | ||||||
|  |         conv2d(in_filters, filters_list[0], 1),  # 多次使用 1*1 的卷积调整通道,并进行通道方向的信息融合 | ||||||
|  |         conv2d(filters_list[0], filters_list[1], 3), | ||||||
|  |         conv2d(filters_list[1], filters_list[0], 1), | ||||||
|  |         conv2d(filters_list[0], filters_list[1], 3), | ||||||
|  |         conv2d(filters_list[1], filters_list[0], 1), | ||||||
|  |         conv2d(filters_list[0], filters_list[1], 3), | ||||||
|  |         nn.Conv2d(filters_list[1], out_filter, kernel_size=1, stride=1, padding=0, bias=True) | ||||||
|  |     ) | ||||||
|  |     return m | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class YoloBody(nn.Module): | ||||||
|  |     def __init__(self, anchors_mask, num_classes, pretrained=False): | ||||||
|  |         super(YoloBody, self).__init__() | ||||||
|  |         self.width = 416  # 临时加 | ||||||
|  |         self.height = 416  # 临时加 | ||||||
|  |         # ---------------------------------------------------# | ||||||
|  |         #   生成darknet53的主干模型 | ||||||
|  |         #   获得三个有效特征层,他们的shape分别是: | ||||||
|  |         #   52,52,256 | ||||||
|  |         #   26,26,512 | ||||||
|  |         #   13,13,1024 | ||||||
|  |         # ---------------------------------------------------# | ||||||
|  |         self.backbone = darknet53() | ||||||
|  |         if pretrained:  # 载入预训练的权重,darknet53是一个分类网络 | ||||||
|  |             self.backbone.load_state_dict(torch.load("model_data/darknet53_backbone_weights.pth")) | ||||||
|  | 
 | ||||||
|  |         # ---------------------------------------------------# | ||||||
|  |         #   out_filters : [64, 128, 256, 512, 1024] | ||||||
|  |         # ---------------------------------------------------# | ||||||
|  |         out_filters = self.backbone.layers_out_filters | ||||||
|  | 
 | ||||||
|  |         # ------------------------------------------------------------------------# | ||||||
|  |         #   计算yolo_head的输出通道数,对于voc数据集而言 | ||||||
|  |         #   final_out_filter0 = final_out_filter1 = final_out_filter2 = 75 | ||||||
|  |         # ------------------------------------------------------------------------#  len(anchors_mask[0]) 为 3 | ||||||
|  |         self.last_layer0 = make_last_layers([512, 1024], out_filters[-1], len(anchors_mask[0]) * (num_classes + 5)) | ||||||
|  | 
 | ||||||
|  |         self.last_layer1_conv = conv2d(512, 256, 1) | ||||||
|  |         self.last_layer1_upsample = nn.Upsample(scale_factor=2, mode='nearest') | ||||||
|  |         self.last_layer1 = make_last_layers([256, 512], out_filters[-2] + 256, len(anchors_mask[1]) * (num_classes + 5)) | ||||||
|  | 
 | ||||||
|  |         self.last_layer2_conv = conv2d(256, 128, 1) | ||||||
|  |         self.last_layer2_upsample = nn.Upsample(scale_factor=2, mode='nearest') | ||||||
|  |         self.last_layer2 = make_last_layers([128, 256], out_filters[-3] + 128, len(anchors_mask[2]) * (num_classes + 5)) | ||||||
|  | 
 | ||||||
|  |     def forward(self, x): | ||||||
|  |         # ---------------------------------------------------# | ||||||
|  |         #   获得三个有效特征层,他们的shape分别是: | ||||||
|  |         #   52,52,256;26,26,512;13,13,1024 | ||||||
|  |         # ---------------------------------------------------# | ||||||
|  |         x2, x1, x0 = self.backbone(x) | ||||||
|  | 
 | ||||||
|  |         # ---------------------------------------------------# | ||||||
|  |         #   第一个特征层 | ||||||
|  |         #   out0 = (batch_size,255,13,13) | ||||||
|  |         # ---------------------------------------------------# | ||||||
|  |         # 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 | ||||||
|  |         out0_branch = self.last_layer0[:5](x0) | ||||||
|  |         out0 = self.last_layer0[5:](out0_branch)  # 8, 75, 13, 13   刚开始的2是测试用的,不是正式数据 | ||||||
|  | 
 | ||||||
|  |         # 13,13,512 -> 13,13,256 -> 26,26,256 | ||||||
|  |         x1_in = self.last_layer1_conv(out0_branch)  # 融合分支 | ||||||
|  |         x1_in = self.last_layer1_upsample(x1_in) | ||||||
|  | 
 | ||||||
|  |         # 26,26,256 + 26,26,512 -> 26,26,768 | ||||||
|  |         x1_in = torch.cat([x1_in, x1], 1) | ||||||
|  |         # ---------------------------------------------------# | ||||||
|  |         #   第二个特征层 | ||||||
|  |         #   out1 = (batch_size,255,26,26) | ||||||
|  |         # ---------------------------------------------------# | ||||||
|  |         # 26,26,768 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 | ||||||
|  |         out1_branch = self.last_layer1[:5](x1_in) | ||||||
|  |         out1 = self.last_layer1[5:](out1_branch) | ||||||
|  | 
 | ||||||
|  |         # 26,26,256 -> 26,26,128 -> 52,52,128 | ||||||
|  |         x2_in = self.last_layer2_conv(out1_branch)  # 融合 | ||||||
|  |         x2_in = self.last_layer2_upsample(x2_in) | ||||||
|  | 
 | ||||||
|  |         # 52,52,128 + 52,52,256 -> 52,52,384 | ||||||
|  |         x2_in = torch.cat([x2_in, x2], 1) | ||||||
|  |         # ---------------------------------------------------# | ||||||
|  |         #   第三个特征层 | ||||||
|  |         #   out3 = (batch_size,255,52,52) | ||||||
|  |         # ---------------------------------------------------# | ||||||
|  |         # 52,52,384 -> 52,52,128 -> 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128 | ||||||
|  |         out2 = self.last_layer2(x2_in) | ||||||
|  |         return out0, out1, out2 | ||||||
							
								
								
									
										488
									
								
								nets/yolo_training.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										488
									
								
								nets/yolo_training.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,488 @@ | |||||||
|  | import math | ||||||
|  | from functools import partial | ||||||
|  | 
 | ||||||
|  | import numpy as np | ||||||
|  | import torch | ||||||
|  | import torch.nn as nn | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class YOLOLoss(nn.Module): | ||||||
|  |     def __init__(self, anchors, num_classes, input_shape, cuda, anchors_mask=[[6, 7, 8], [3, 4, 5], [0, 1, 2]]): | ||||||
|  |         super(YOLOLoss, self).__init__() | ||||||
|  |         # -----------------------------------------------------------# | ||||||
|  |         #   13x13的特征层对应的anchor是[116,90],[156,198],[373,326] | ||||||
|  |         #   26x26的特征层对应的anchor是[30,61],[62,45],[59,119] | ||||||
|  |         #   52x52的特征层对应的anchor是[10,13],[16,30],[33,23] | ||||||
|  |         # -----------------------------------------------------------# | ||||||
|  |         self.anchors = anchors | ||||||
|  |         self.num_classes = num_classes | ||||||
|  |         self.bbox_attrs = 5 + num_classes | ||||||
|  |         self.input_shape = input_shape | ||||||
|  |         self.anchors_mask = anchors_mask | ||||||
|  | 
 | ||||||
|  |         self.giou = True | ||||||
|  |         self.balance = [0.4, 1.0, 4] | ||||||
|  |         self.box_ratio = 0.05 | ||||||
|  |         self.obj_ratio = 5 * (input_shape[0] * input_shape[1]) / (416 ** 2) | ||||||
|  |         self.cls_ratio = 1 * (num_classes / 80) | ||||||
|  | 
 | ||||||
|  |         self.ignore_threshold = 0.5 | ||||||
|  |         self.cuda = cuda | ||||||
|  | 
 | ||||||
|  |     def clip_by_tensor(self, t, t_min, t_max): | ||||||
|  |         t = t.float() | ||||||
|  |         result = (t >= t_min).float() * t + (t < t_min).float() * t_min  # 要么是t,要么是t_min | ||||||
|  |         result = (result <= t_max).float() * result + (result > t_max).float() * t_max | ||||||
|  |         return result | ||||||
|  | 
 | ||||||
|  |     def MSELoss(self, pred, target): | ||||||
|  |         return torch.pow(pred - target, 2) | ||||||
|  | 
 | ||||||
|  |     def BCELoss(self, pred, target): | ||||||
|  |         epsilon = 1e-7 | ||||||
|  |         pred = self.clip_by_tensor(pred, epsilon, 1.0 - epsilon)  # 保证tensor在  epsilon和1.0 - epsilon之间 | ||||||
|  |         output = - target * torch.log(pred) - (1.0 - target) * torch.log(1.0 - pred) | ||||||
|  |         return output | ||||||
|  | 
 | ||||||
|  |     def box_giou(self, b1, b2): | ||||||
|  |         """ | ||||||
|  |         输入为: | ||||||
|  |         ---------- | ||||||
|  |         b1: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh | ||||||
|  |         b2: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh | ||||||
|  | 
 | ||||||
|  |         返回为: | ||||||
|  |         ------- | ||||||
|  |         giou: tensor, shape=(batch, feat_w, feat_h, anchor_num, 1) | ||||||
|  |         """ | ||||||
|  |         # ----------------------------------------------------# | ||||||
|  |         #   求出预测框左上角右下角 | ||||||
|  |         # ----------------------------------------------------# | ||||||
|  |         b1_xy = b1[..., :2] | ||||||
|  |         b1_wh = b1[..., 2:4] | ||||||
|  |         b1_wh_half = b1_wh / 2. | ||||||
|  |         b1_mins = b1_xy - b1_wh_half | ||||||
|  |         b1_maxes = b1_xy + b1_wh_half | ||||||
|  |         # ----------------------------------------------------# | ||||||
|  |         #   求出真实框左上角右下角 | ||||||
|  |         # ----------------------------------------------------# | ||||||
|  |         b2_xy = b2[..., :2] | ||||||
|  |         b2_wh = b2[..., 2:4] | ||||||
|  |         b2_wh_half = b2_wh / 2. | ||||||
|  |         b2_mins = b2_xy - b2_wh_half | ||||||
|  |         b2_maxes = b2_xy + b2_wh_half | ||||||
|  | 
 | ||||||
|  |         # ----------------------------------------------------# | ||||||
|  |         #   求真实框和预测框所有的iou | ||||||
|  |         # ----------------------------------------------------# | ||||||
|  |         intersect_mins = torch.max(b1_mins, b2_mins) | ||||||
|  |         intersect_maxes = torch.min(b1_maxes, b2_maxes) | ||||||
|  |         intersect_wh = torch.max(intersect_maxes - intersect_mins, torch.zeros_like(intersect_maxes)) | ||||||
|  |         intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] | ||||||
|  |         b1_area = b1_wh[..., 0] * b1_wh[..., 1] | ||||||
|  |         b2_area = b2_wh[..., 0] * b2_wh[..., 1] | ||||||
|  |         union_area = b1_area + b2_area - intersect_area | ||||||
|  |         iou = intersect_area / union_area | ||||||
|  | 
 | ||||||
|  |         # ----------------------------------------------------# | ||||||
|  |         #   找到包裹两个框的最小框的左上角和右下角 | ||||||
|  |         # ----------------------------------------------------# | ||||||
|  |         enclose_mins = torch.min(b1_mins, b2_mins) | ||||||
|  |         enclose_maxes = torch.max(b1_maxes, b2_maxes) | ||||||
|  |         enclose_wh = torch.max(enclose_maxes - enclose_mins, torch.zeros_like(intersect_maxes)) | ||||||
|  |         # ----------------------------------------------------# | ||||||
|  |         #   计算对角线距离 | ||||||
|  |         # ----------------------------------------------------# | ||||||
|  |         enclose_area = enclose_wh[..., 0] * enclose_wh[..., 1] | ||||||
|  |         giou = iou - (enclose_area - union_area) / enclose_area | ||||||
|  | 
 | ||||||
|  |         return giou | ||||||
|  | 
 | ||||||
|  |     def forward(self, l, input, targets=None): | ||||||
|  |         # ----------------------------------------------------# | ||||||
|  |         #   l代表的是,当前输入进来的有效特征层,是第几个有效特征层 | ||||||
|  |         #   input的shape为  bs, 3*(5+num_classes), 13, 13 | ||||||
|  |         #                   bs, 3*(5+num_classes), 26, 26 | ||||||
|  |         #                   bs, 3*(5+num_classes), 52, 52 | ||||||
|  |         #   targets代表的是真实框。 | ||||||
|  |         # ----------------------------------------------------# | ||||||
|  |         # --------------------------------# | ||||||
|  |         #   获得图片数量,特征层的高和宽 | ||||||
|  |         #   13和13 | ||||||
|  |         # --------------------------------# | ||||||
|  |         bs = input.size(0) | ||||||
|  |         in_h = input.size(2) | ||||||
|  |         in_w = input.size(3) | ||||||
|  |         # -----------------------------------------------------------------------# | ||||||
|  |         #   计算步长 | ||||||
|  |         #   每一个特征点对应原来的图片上多少个像素点 | ||||||
|  |         #   如果特征层为13x13的话,一个特征点就对应原来的图片上的32个像素点 | ||||||
|  |         #   如果特征层为26x26的话,一个特征点就对应原来的图片上的16个像素点 | ||||||
|  |         #   如果特征层为52x52的话,一个特征点就对应原来的图片上的8个像素点 | ||||||
|  |         #   stride_h = stride_w = 32、16、8 | ||||||
|  |         #   stride_h和stride_w都是32。 | ||||||
|  |         # -----------------------------------------------------------------------# | ||||||
|  |         stride_h = self.input_shape[0] / in_h | ||||||
|  |         stride_w = self.input_shape[1] / in_w | ||||||
|  |         # -------------------------------------------------# | ||||||
|  |         #   把anchor转换到此时获得的scaled_anchors大小是相对于特征层的 | ||||||
|  |         # -------------------------------------------------# | ||||||
|  |         scaled_anchors = [(a_w / stride_w, a_h / stride_h) for a_w, a_h in self.anchors]  # 把anchor也缩放到与输出特征图相同尺度 | ||||||
|  |         # -----------------------------------------------# | ||||||
|  |         #   输入的input一共有三个,他们的shape分别是 | ||||||
|  |         #   bs, 3*(5+num_classes), 13, 13 => batch_size, 3, 13, 13, 5 + num_classes | ||||||
|  |         #   batch_size, 3, 26, 26, 5 + num_classes | ||||||
|  |         #   batch_size, 3, 52, 52, 5 + num_classes | ||||||
|  |         # -----------------------------------------------# | ||||||
|  |         prediction = input.view(bs, len(self.anchors_mask[l]), self.bbox_attrs, in_h, in_w).permute( | ||||||
|  |             0, 1, 3, 4, 2).contiguous()  # batch_size, 3种anchor, h, w, 单个anchor对应的25个输出值 | ||||||
|  | 
 | ||||||
|  |         # -----------------------------------------------# | ||||||
|  |         #   先验框的中心位置的调整参数 | ||||||
|  |         # -----------------------------------------------# | ||||||
|  |         x = torch.sigmoid(prediction[..., 0])  # prediction[..., 0]  维度是8, 3, 13, 13   取tx坐标 | ||||||
|  |         y = torch.sigmoid(prediction[..., 1])  # ty | ||||||
|  |         # -----------------------------------------------# | ||||||
|  |         #   先验框的宽高调整参数 | ||||||
|  |         # -----------------------------------------------# | ||||||
|  |         w = prediction[..., 2]  # tw | ||||||
|  |         h = prediction[..., 3]  # th | ||||||
|  |         # -----------------------------------------------# | ||||||
|  |         #   获得置信度,是否有物体 | ||||||
|  |         # -----------------------------------------------# | ||||||
|  |         conf = torch.sigmoid(prediction[..., 4])  # prediction[..., 4] 是否有目标 | ||||||
|  |         # -----------------------------------------------# | ||||||
|  |         #   种类置信度 | ||||||
|  |         # -----------------------------------------------# | ||||||
|  |         pred_cls = torch.sigmoid(prediction[..., 5:]) | ||||||
|  | 
 | ||||||
|  |         # -----------------------------------------------# | ||||||
|  |         #   获得网络应该有的预测结果  y_true是重新建立的真实标签 8, 3, 13, 13, 25.  noobj_mask中有目标为0,其他为1.  box_loss_scale记录了面积 | ||||||
|  |         # -----------------------------------------------# | ||||||
|  |         y_true, noobj_mask, box_loss_scale = self.get_target(l, targets, scaled_anchors, in_h, in_w) | ||||||
|  |         # y_true中,是用 真实框转换为 与网络输出一致的格式。比如,坐标是在输出特征分辨率下的,类别是真实框所在的cell对应的类别。 | ||||||
|  |         # ---------------------------------------------------------------# | ||||||
|  |         #   将预测结果进行解码,判断预测结果和真实值的重合程度 | ||||||
|  |         #   如果重合程度过大则忽略,因为这些特征点属于预测比较准确的特征点 | ||||||
|  |         #   作为负样本不合适  # l在这里是三个多尺度特征图的第几个  pred_boxes是生成的网络预测的结果 | ||||||
|  |         # ----------------------------------------------------------------# | ||||||
|  |         noobj_mask, pred_boxes = self.get_ignore(l, x, y, h, w, targets, scaled_anchors, in_h, in_w, noobj_mask) | ||||||
|  | 
 | ||||||
|  |         if self.cuda: | ||||||
|  |             y_true = y_true.type_as(x) | ||||||
|  |             noobj_mask = noobj_mask.type_as(x) | ||||||
|  |             box_loss_scale = box_loss_scale.type_as(x) | ||||||
|  |         # --------------------------------------------------------------------------# | ||||||
|  |         #   box_loss_scale是真实框宽高的乘积,宽高均在0-1之间,因此乘积也在0-1之间。 | ||||||
|  |         #   2-宽高的乘积代表真实框越大,比重越小,小框的比重更大。 | ||||||
|  |         # --------------------------------------------------------------------------# | ||||||
|  |         box_loss_scale = 2 - box_loss_scale | ||||||
|  | 
 | ||||||
|  |         loss = 0 | ||||||
|  |         obj_mask = y_true[..., 4] == 1 | ||||||
|  |         n = torch.sum(obj_mask) | ||||||
|  |         if n != 0: | ||||||
|  |             if self.giou: | ||||||
|  |                 # ---------------------------------------------------------------# | ||||||
|  |                 #   计算预测结果和真实结果的giou | ||||||
|  |                 # ----------------------------------------------------------------# | ||||||
|  |                 giou = self.box_giou(pred_boxes, y_true[..., :4]).type_as(x) | ||||||
|  |                 loss_loc = torch.mean((1 - giou)[obj_mask])  # 这里用的GIOU 作为定位误差,而不是论文中的MSE | ||||||
|  |             else: | ||||||
|  |                 # -----------------------------------------------------------# | ||||||
|  |                 #   计算中心偏移情况的loss,使用BCELoss效果好一些 | ||||||
|  |                 # -----------------------------------------------------------# | ||||||
|  |                 loss_x = torch.mean(self.BCELoss(x[obj_mask], y_true[..., 0][obj_mask]) * box_loss_scale[obj_mask]) | ||||||
|  |                 loss_y = torch.mean(self.BCELoss(y[obj_mask], y_true[..., 1][obj_mask]) * box_loss_scale[obj_mask]) | ||||||
|  |                 # -----------------------------------------------------------# | ||||||
|  |                 #   计算宽高调整值的loss | ||||||
|  |                 # -----------------------------------------------------------# | ||||||
|  |                 loss_w = torch.mean(self.MSELoss(w[obj_mask], y_true[..., 2][obj_mask]) * box_loss_scale[obj_mask]) | ||||||
|  |                 loss_h = torch.mean(self.MSELoss(h[obj_mask], y_true[..., 3][obj_mask]) * box_loss_scale[obj_mask]) | ||||||
|  |                 loss_loc = (loss_x + loss_y + loss_h + loss_w) * 0.1 | ||||||
|  |             # pred_cls[obj_mask] 有目标的框数*   20个属性值(20个分类) | ||||||
|  |             loss_cls = torch.mean(self.BCELoss(pred_cls[obj_mask], y_true[..., 5:][obj_mask]))  # 目标的分类误差 | ||||||
|  |             loss += loss_loc * self.box_ratio + loss_cls * self.cls_ratio | ||||||
|  | 
 | ||||||
|  |         loss_conf = torch.mean(self.BCELoss(conf, obj_mask.type_as(conf))[noobj_mask.bool() | obj_mask])  # 忽略掉部分重叠高的但不是最匹配的预测框  的是否有目标的误差 | ||||||
|  |         loss += loss_conf * self.balance[l] * self.obj_ratio  # self.balance[l]不同层的权重不一样 [0.4, 1.0, 4]  表示对小目标损失权重更大 | ||||||
|  |         # if n != 0: | ||||||
|  |         #     print(loss_loc * self.box_ratio, loss_cls * self.cls_ratio, loss_conf * self.balance[l] * self.obj_ratio) | ||||||
|  |         return loss | ||||||
|  | 
 | ||||||
|  |     def calculate_iou(self, _box_a, _box_b): | ||||||
|  |         # -----------------------------------------------------------# | ||||||
|  |         #   计算真实框的左上角和右下角  以0,0为中心点,计算左上角和右下角 | ||||||
|  |         # -----------------------------------------------------------# | ||||||
|  |         b1_x1, b1_x2 = _box_a[:, 0] - _box_a[:, 2] / 2, _box_a[:, 0] + _box_a[:, 2] / 2 | ||||||
|  |         b1_y1, b1_y2 = _box_a[:, 1] - _box_a[:, 3] / 2, _box_a[:, 1] + _box_a[:, 3] / 2 | ||||||
|  |         # -----------------------------------------------------------# | ||||||
|  |         #   计算先验框获得的预测框的左上角和右下角 | ||||||
|  |         # -----------------------------------------------------------# | ||||||
|  |         b2_x1, b2_x2 = _box_b[:, 0] - _box_b[:, 2] / 2, _box_b[:, 0] + _box_b[:, 2] / 2 | ||||||
|  |         b2_y1, b2_y2 = _box_b[:, 1] - _box_b[:, 3] / 2, _box_b[:, 1] + _box_b[:, 3] / 2 | ||||||
|  | 
 | ||||||
|  |         # -----------------------------------------------------------# | ||||||
|  |         #   将真实框和预测框都转化成左上角右下角的形式 | ||||||
|  |         # -----------------------------------------------------------# | ||||||
|  |         box_a = torch.zeros_like(_box_a) | ||||||
|  |         box_b = torch.zeros_like(_box_b) | ||||||
|  |         box_a[:, 0], box_a[:, 1], box_a[:, 2], box_a[:, 3] = b1_x1, b1_y1, b1_x2, b1_y2 | ||||||
|  |         box_b[:, 0], box_b[:, 1], box_b[:, 2], box_b[:, 3] = b2_x1, b2_y1, b2_x2, b2_y2 | ||||||
|  | 
 | ||||||
|  |         # ----------------------------------------------------------- # | ||||||
|  |         #   A为真实框的数量,B为先验框的数量 | ||||||
|  |         # ----------------------------------------------------------- # | ||||||
|  |         A = box_a.size(0) | ||||||
|  |         B = box_b.size(0) | ||||||
|  | 
 | ||||||
|  |         # ----------------------------------------------------------- # | ||||||
|  |         #   计算交的面积  box_a是真实框,左上角和右下角。 box_b是先验框的左上角和右下角 | ||||||
|  |         #   box_a[:, 2:].unsqueeze(1).expand(A, B, 2) 从 5, 1, 2 扩展到5, 9, 2。 这里的5是图中框的数量。每一个组有9个,5个框重复9次 | ||||||
|  |         #   box_b[:, 2:].unsqueeze(0).expand(A, B, 2) 从 1, 9, 2 扩展到5, 9, 2。 这里的每一个组9个是不一样的9个anchor框,重复5次。 | ||||||
|  |         # ----------------------------------------------------------- # | ||||||
|  | 
 | ||||||
|  |         # 每一个gt复制 len(anchors)次,然后与所有anchors比较 | ||||||
|  |         max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),  # 计算右下角的最小点 | ||||||
|  |                            box_b[:, 2:].unsqueeze(0).expand(A, B, 2))  # 输出 5,9,2 | ||||||
|  |         min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),  # 计算左上角的最大点 | ||||||
|  |                            box_b[:, :2].unsqueeze(0).expand(A, B, 2))  # 输出 5,9,2 | ||||||
|  |         inter = torch.clamp((max_xy - min_xy),  # 这里无法判断两个框不相交的情况。但不相交 U 就大,所以应该不影响结果 | ||||||
|  |                             min=0)  # 最小值是0,最大值不限。相减之后,得到宽和高。# input:输入张量  min:范围的最小值,如果不指定的话,会默认无下界 max:范围的最大值,如果不指定的话,会默认无上界 | ||||||
|  |         inter = inter[:, :, 0] * inter[:, :, 1]  # 每个真实框与锚框  相交的面积 | ||||||
|  |         # ----------------------------------------------------------- # | ||||||
|  |         #   计算预测框和真实框各自的面积 | ||||||
|  |         # ----------------------------------------------------------- # | ||||||
|  |         area_a = ((box_a[:, 2] - box_a[:, 0]) * (box_a[:, 3] - box_a[:, 1])).unsqueeze(1).expand_as( | ||||||
|  |             inter)  # [A,B]  5个值,重复9次 | ||||||
|  |         area_b = ((box_b[:, 2] - box_b[:, 0]) * (box_b[:, 3] - box_b[:, 1])).unsqueeze(0).expand_as( | ||||||
|  |             inter)  # [A,B]  9个值,重复5次 | ||||||
|  |         # ----------------------------------------------------------- # | ||||||
|  |         #   求IOU | ||||||
|  |         # ----------------------------------------------------------- # | ||||||
|  |         union = area_a + area_b - inter | ||||||
|  |         return inter / union  # [A,B] | ||||||
|  | 
 | ||||||
|  |     def get_target(self, l, targets, anchors, in_h, in_w): | ||||||
|  |         # -----------------------------------------------------# | ||||||
|  |         #   计算一共有多少张图片 | ||||||
|  |         # -----------------------------------------------------# | ||||||
|  |         bs = len(targets) | ||||||
|  |         # -----------------------------------------------------# | ||||||
|  |         #   对每一个grid cell,都需要标记。用于选取哪些先验框不包含物体 | ||||||
|  |         # -----------------------------------------------------# | ||||||
|  |         noobj_mask = torch.ones(bs, len(self.anchors_mask[l]), in_h, in_w, requires_grad=False) | ||||||
|  |         # -----------------------------------------------------# | ||||||
|  |         #   让网络更加去关注小目标 | ||||||
|  |         # -----------------------------------------------------# | ||||||
|  |         box_loss_scale = torch.zeros(bs, len(self.anchors_mask[l]), in_h, in_w, requires_grad=False) | ||||||
|  |         # -----------------------------------------------------# | ||||||
|  |         #   batch_size, 3, 13, 13, 5 + num_classes | ||||||
|  |         # -----------------------------------------------------# | ||||||
|  |         y_true = torch.zeros(bs, len(self.anchors_mask[l]), in_h, in_w, self.bbox_attrs, requires_grad=False) | ||||||
|  |         for b in range(bs):  # 每张图片单独计算 | ||||||
|  |             if len(targets[b]) == 0:  # targets是真实框 | ||||||
|  |                 continue | ||||||
|  |             batch_target = torch.zeros_like(targets[b])  # 把0~1之间的targets转换到 特征图大小的 targets | ||||||
|  |             # -------------------------------------------------------# | ||||||
|  |             #   计算出正样本在特征层上的中心点  # box第0,1维记录中心点 box第2,3维记录宽高  # 这里不知道为何这样做,但结果一样的 | ||||||
|  |             # -------------------------------------------------------# | ||||||
|  |             batch_target[:, [0, 2]] = targets[b][:, [0, 2]] * in_w  # 从归一化的box中反解出在 13*13 分辨率下的大小 两个 x 坐标 | ||||||
|  |             batch_target[:, [1, 3]] = targets[b][:, [1, 3]] * in_h | ||||||
|  |             batch_target[:, 4] = targets[b][:, 4] | ||||||
|  |             batch_target = batch_target.cpu()  # 因为是从targets(放在cuda上)中复制过来的,所以需要执行一次cpu() | ||||||
|  | 
 | ||||||
|  |             # -------------------------------------------------------# | ||||||
|  |             #   将真实框转换一个形式   相当于都放到0, 0, w, h  进行比较 | ||||||
|  |             #   num_true_box, 4  # 把2,3 维,也就是宽和高取出,前面拼两个0 | ||||||
|  |             # -------------------------------------------------------# | ||||||
|  |             gt_box = torch.FloatTensor(torch.cat((torch.zeros((batch_target.size(0), 2)), batch_target[:, 2:4]), 1)) | ||||||
|  |             # -------------------------------------------------------# | ||||||
|  |             #   将先验框转换一个形式 | ||||||
|  |             #   9, 4    在先验框大小前面加了两个0 | ||||||
|  |             # -------------------------------------------------------# | ||||||
|  |             anchor_shapes = torch.FloatTensor( | ||||||
|  |                 torch.cat((torch.zeros((len(anchors), 2)), torch.FloatTensor(anchors)), 1)) | ||||||
|  |             # -------------------------------------------------------# | ||||||
|  |             #   计算交并比 | ||||||
|  |             #   self.calculate_iou(gt_box, anchor_shapes) = [num_true_box, 9]每一个真实框和9个先验框的重合情况 | ||||||
|  |             #   best_ns: | ||||||
|  |             #   [每个真实框最大的重合度max_iou, 每一个真实框最重合的先验框的序号]   # self.calculate_iou(gt_box, anchor_shapes) 的结果,是  b x len(anchors) | ||||||
|  |             # -------------------------------------------------------# | ||||||
|  |             best_ns = torch.argmax(self.calculate_iou(gt_box, anchor_shapes), dim=-1)  # 找到每个真实框与所有anchor的IoU,然后取出每个真实框最匹配的anchor下标 | ||||||
|  |             # 依次遍历每个真实框对应的anchor号数,找到在  所属当前层的3个anchor中的下标 | ||||||
|  |             for t, best_n in enumerate(best_ns):  # l是最后输出的多层特征图第几层 | ||||||
|  |                 if best_n not in self.anchors_mask[l]:  # self.anchors_mask的用法是指定当前特征图用的是哪3个anchor | ||||||
|  |                     continue | ||||||
|  |                 # ----------------------------------------# | ||||||
|  |                 #   判断这个先验框是当前特征点的哪一个先验框   l是第几号最后的输出特征图 | ||||||
|  |                 # ----------------------------------------# | ||||||
|  |                 k = self.anchors_mask[l].index(best_n)  # 使用当前层对应anchors的第几号anchor | ||||||
|  |                 # ----------------------------------------# | ||||||
|  |                 #   获得真实框属于哪个网格点  获取中心点。因为映射到了13*13分辨率上。  floor不就是左上角的意思? | ||||||
|  |                 # ----------------------------------------# | ||||||
|  |                 i = torch.floor(batch_target[t, 0]).long()  # t 表示当前是第几个真实框 | ||||||
|  |                 j = torch.floor(batch_target[t, 1]).long() | ||||||
|  |                 # ----------------------------------------# | ||||||
|  |                 #   取出真实框的种类 | ||||||
|  |                 # ----------------------------------------# | ||||||
|  |                 c = batch_target[t, 4].long() | ||||||
|  | 
 | ||||||
|  |                 # ----------------------------------------# | ||||||
|  |                 #   noobj_mask代表无目标的特征点  b是几号batch,k是几号anchor | ||||||
|  |                 # ----------------------------------------# | ||||||
|  |                 noobj_mask[b, k, j, i] = 0 | ||||||
|  |                 # ----------------------------------------# | ||||||
|  |                 #   tx、ty代表中心调整参数的真实值 | ||||||
|  |                 # ----------------------------------------# | ||||||
|  |                 if not self.giou:  # 不走这条分支 | ||||||
|  |                     # ----------------------------------------# | ||||||
|  |                     #   tx、ty代表中心调整参数的真实值 | ||||||
|  |                     # ----------------------------------------# | ||||||
|  |                     y_true[b, k, j, i, 0] = batch_target[t, 0] - i.float() | ||||||
|  |                     y_true[b, k, j, i, 1] = batch_target[t, 1] - j.float() | ||||||
|  |                     y_true[b, k, j, i, 2] = math.log(batch_target[t, 2] / anchors[best_n][0]) | ||||||
|  |                     y_true[b, k, j, i, 3] = math.log(batch_target[t, 3] / anchors[best_n][1]) | ||||||
|  |                     y_true[b, k, j, i, 4] = 1 | ||||||
|  |                     y_true[b, k, j, i, c + 5] = 1  # 重新设置标记种类 | ||||||
|  |                 else: | ||||||
|  |                     # ----------------------------------------# | ||||||
|  |                     #   tx、ty代表中心调整参数的真实值  重新生成的标签 y_true  t是当前的图像的第t个真实框 | ||||||
|  |                     # ----------------------------------------# | ||||||
|  |                     y_true[b, k, j, i, 0] = batch_target[t, 0] | ||||||
|  |                     y_true[b, k, j, i, 1] = batch_target[t, 1] | ||||||
|  |                     y_true[b, k, j, i, 2] = batch_target[t, 2] | ||||||
|  |                     y_true[b, k, j, i, 3] = batch_target[t, 3] | ||||||
|  |                     y_true[b, k, j, i, 4] = 1  # 有物体 | ||||||
|  |                     y_true[b, k, j, i, c + 5] = 1  # c是种类 | ||||||
|  |                 # ----------------------------------------# | ||||||
|  |                 #   用于获得xywh的比例 | ||||||
|  |                 #   大目标loss权重小,小目标loss权重大 | ||||||
|  |                 # ----------------------------------------# | ||||||
|  |                 box_loss_scale[b, k, j, i] = batch_target[t, 2] * batch_target[t, 3] / in_w / in_h  # 这里计算出面积,能反应大小目标。又归一化到0~1之间。 | ||||||
|  |         return y_true, noobj_mask, box_loss_scale | ||||||
|  | 
 | ||||||
|  |     def get_ignore(self, l, x, y, h, w, targets, scaled_anchors, in_h, in_w, noobj_mask): | ||||||
|  |         # -----------------------------------------------------# | ||||||
|  |         #   计算一共有多少张图片 | ||||||
|  |         # -----------------------------------------------------# | ||||||
|  |         bs = len(targets) | ||||||
|  | 
 | ||||||
|  |         # -----------------------------------------------------# | ||||||
|  |         #   生成网格,先验框中心,网格左上角  torch.linspace(0, in_w - 1, in_w) 在0, in_w - 1之间分成in_w个点。.repeat(in_h, 1)沿0重复in_h次,沿1重复1次 | ||||||
|  |         # -----------------------------------------------------# | ||||||
|  |         grid_x = torch.linspace(0, in_w - 1, in_w).repeat(in_h, 1).repeat( | ||||||
|  |             int(bs * len(self.anchors_mask[l])), 1, 1).view(x.shape).type_as(x)  # 这样写 repeat 比较清晰。repeat从右向左分析比较清晰。后两维是沿着竖轴和横轴重复指定次数。 | ||||||
|  |         grid_y = torch.linspace(0, in_h - 1, in_h).repeat(in_w, 1).t().repeat( | ||||||
|  |             int(bs * len(self.anchors_mask[l])), 1, 1).view(y.shape).type_as(x) | ||||||
|  | 
 | ||||||
|  |         # 生成先验框的宽高 | ||||||
|  |         scaled_anchors_l = np.array(scaled_anchors)[self.anchors_mask[l]]  # 取出对应的3个先验框的具体值 | ||||||
|  |         anchor_w = torch.Tensor(scaled_anchors_l).index_select(1, torch.LongTensor([0])).type_as(x)  # 沿1维度,找到第几维值 | ||||||
|  |         anchor_h = torch.Tensor(scaled_anchors_l).index_select(1, torch.LongTensor([1])).type_as(x) | ||||||
|  | 
 | ||||||
|  |         anchor_w = anchor_w.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(w.shape)  # 13*13 个一样的,形成一组。3个不一样的13*13。 x8次 | ||||||
|  |         anchor_h = anchor_h.repeat(bs, 1).repeat(1, 1, in_h * in_w).view(h.shape) | ||||||
|  |         # -------------------------------------------------------# | ||||||
|  |         #   计算调整后的先验框中心与宽高  x是输出的第0属性,就是x的sigmoid的输出坐标 | ||||||
|  |         # -------------------------------------------------------# | ||||||
|  |         pred_boxes_x = torch.unsqueeze(x + grid_x, -1) | ||||||
|  |         pred_boxes_y = torch.unsqueeze(y + grid_y, -1) | ||||||
|  |         pred_boxes_w = torch.unsqueeze(torch.exp(w) * anchor_w, -1) | ||||||
|  |         pred_boxes_h = torch.unsqueeze(torch.exp(h) * anchor_h, -1) | ||||||
|  |         pred_boxes = torch.cat([pred_boxes_x, pred_boxes_y, pred_boxes_w, pred_boxes_h], dim=-1) | ||||||
|  | 
 | ||||||
|  |         for b in range(bs):  # 对一个 batch 里的数据  一张张图像  分别进行操作 | ||||||
|  |             # -------------------------------------------------------# | ||||||
|  |             #   将预测结果转换一个形式 | ||||||
|  |             #   pred_boxes_for_ignore      num_anchors, 4 | ||||||
|  |             # -------------------------------------------------------# | ||||||
|  |             pred_boxes_for_ignore = pred_boxes[b].view(-1, 4) | ||||||
|  |             # -------------------------------------------------------# | ||||||
|  |             #   计算真实框,并把真实框转换成相对于特征层的大小 | ||||||
|  |             #   gt_box      num_true_box, 4 | ||||||
|  |             # -------------------------------------------------------# | ||||||
|  |             if len(targets[b]) > 0:  # 如果有目标,进行下面的操作。否则 跳到下一张图片。 | ||||||
|  |                 batch_target = torch.zeros_like(targets[b]) | ||||||
|  |                 # -------------------------------------------------------# | ||||||
|  |                 #   计算出正样本在特征层上的中心点  # 这里地方好像也是把 box当前左上角和右下角的形式,实现已经变成了中心点与宽高的形式。但无论如何,最终的结果没变。 | ||||||
|  |                 # -------------------------------------------------------# | ||||||
|  |                 batch_target[:, [0, 2]] = targets[b][:, [0, 2]] * in_w | ||||||
|  |                 batch_target[:, [1, 3]] = targets[b][:, [1, 3]] * in_h | ||||||
|  |                 batch_target = batch_target[:, :4].type_as(x) | ||||||
|  |                 # -------------------------------------------------------# | ||||||
|  |                 #   计算交并比 | ||||||
|  |                 #   anch_ious       num_true_box, num_anchors | ||||||
|  |                 # -------------------------------------------------------# | ||||||
|  |                 anch_ious = self.calculate_iou(batch_target, pred_boxes_for_ignore)  # 真实框与预测框的IoU | ||||||
|  |                 # -------------------------------------------------------# | ||||||
|  |                 #   每个先验框???对应真实框的最大重合度 | ||||||
|  |                 #   anch_ious_max   num_anchors | ||||||
|  |                 # -------------------------------------------------------# | ||||||
|  |                 anch_ious_max, _ = torch.max(anch_ious, dim=0)  # 每个真实框与预测框的最大值。 | ||||||
|  |                 anch_ious_max = anch_ious_max.view(pred_boxes[b].size()[:3]) | ||||||
|  |                 noobj_mask[b][anch_ious_max > self.ignore_threshold] = 0  # 如果大于某个阈值,即使不是最匹配的,也可以忽略这个cell。所以noobj设置为0。 | ||||||
|  |         return noobj_mask, pred_boxes | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def weights_init(net, init_type='normal', init_gain=0.02): | ||||||
|  |     def init_func(m): | ||||||
|  |         classname = m.__class__.__name__ | ||||||
|  |         if hasattr(m, 'weight') and classname.find('Conv') != -1: | ||||||
|  |             if init_type == 'normal': | ||||||
|  |                 torch.nn.init.normal_(m.weight.data, 0.0, init_gain) | ||||||
|  |             elif init_type == 'xavier': | ||||||
|  |                 torch.nn.init.xavier_normal_(m.weight.data, gain=init_gain) | ||||||
|  |             elif init_type == 'kaiming': | ||||||
|  |                 torch.nn.init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') | ||||||
|  |             elif init_type == 'orthogonal': | ||||||
|  |                 torch.nn.init.orthogonal_(m.weight.data, gain=init_gain) | ||||||
|  |             else: | ||||||
|  |                 raise NotImplementedError('initialization method [%s] is not implemented' % init_type) | ||||||
|  |         elif classname.find('BatchNorm2d') != -1: | ||||||
|  |             torch.nn.init.normal_(m.weight.data, 1.0, 0.02) | ||||||
|  |             torch.nn.init.constant_(m.bias.data, 0.0) | ||||||
|  | 
 | ||||||
|  |     print('initialize network with %s type' % init_type) | ||||||
|  |     net.apply(init_func) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def get_lr_scheduler(lr_decay_type, lr, min_lr, total_iters, warmup_iters_ratio=0.05, warmup_lr_ratio=0.1, | ||||||
|  |                      no_aug_iter_ratio=0.05, step_num=10): | ||||||
|  |     def yolox_warm_cos_lr(lr, min_lr, total_iters, warmup_total_iters, warmup_lr_start, no_aug_iter, iters): | ||||||
|  |         if iters <= warmup_total_iters: | ||||||
|  |             # lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start | ||||||
|  |             lr = (lr - warmup_lr_start) * pow(iters / float(warmup_total_iters), 2) + warmup_lr_start | ||||||
|  |         elif iters >= total_iters - no_aug_iter: | ||||||
|  |             lr = min_lr | ||||||
|  |         else: | ||||||
|  |             lr = min_lr + 0.5 * (lr - min_lr) * ( | ||||||
|  |                     1.0 + math.cos( | ||||||
|  |                 math.pi * (iters - warmup_total_iters) / (total_iters - warmup_total_iters - no_aug_iter)) | ||||||
|  |             ) | ||||||
|  |         return lr | ||||||
|  | 
 | ||||||
|  |     def step_lr(lr, decay_rate, step_size, iters): | ||||||
|  |         if step_size < 1: | ||||||
|  |             raise ValueError("step_size must above 1.") | ||||||
|  |         n = iters // step_size | ||||||
|  |         out_lr = lr * decay_rate ** n | ||||||
|  |         return out_lr | ||||||
|  | 
 | ||||||
|  |     if lr_decay_type == "cos": | ||||||
|  |         warmup_total_iters = min(max(warmup_iters_ratio * total_iters, 1), 3) | ||||||
|  |         warmup_lr_start = max(warmup_lr_ratio * lr, 1e-6) | ||||||
|  |         no_aug_iter = min(max(no_aug_iter_ratio * total_iters, 1), 15) | ||||||
|  |         func = partial(yolox_warm_cos_lr, lr, min_lr, total_iters, warmup_total_iters, warmup_lr_start, no_aug_iter) | ||||||
|  |     else: | ||||||
|  |         decay_rate = (min_lr / lr) ** (1 / (step_num - 1)) | ||||||
|  |         step_size = total_iters / step_num | ||||||
|  |         func = partial(step_lr, lr, decay_rate, step_size) | ||||||
|  | 
 | ||||||
|  |     return func | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def set_optimizer_lr(optimizer, lr_scheduler_func, epoch): | ||||||
|  |     lr = lr_scheduler_func(epoch) | ||||||
|  |     for param_group in optimizer.param_groups: | ||||||
|  |         param_group['lr'] = lr | ||||||
							
								
								
									
										135
									
								
								patch_config.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										135
									
								
								patch_config.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,135 @@ | |||||||
|  | from torch import optim | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class BaseConfig(object): | ||||||
|  |     """ | ||||||
|  |     Default parameters for all config files. | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     def __init__(self): | ||||||
|  |         """ | ||||||
|  |         Set the defaults. | ||||||
|  |         """ | ||||||
|  |         # self.img_dir = "inria/Train/pos" | ||||||
|  |         # self.lab_dir = "inria/Train/pos/yolo-labels" | ||||||
|  |         self.img_dir = "cctsdb/Train/pos" | ||||||
|  |         self.lab_dir = "cctsdb/Train/labels" | ||||||
|  |         self.cfgfile = "cfg/yolo.cfg" | ||||||
|  |         self.weightfile = "weights/yolo.weights" | ||||||
|  |         self.printfile = "non_printability/30values.txt" | ||||||
|  |         self.patch_size = 300 | ||||||
|  | 
 | ||||||
|  |         self.start_learning_rate = 0.03 | ||||||
|  | 
 | ||||||
|  |         self.patch_name = 'base' | ||||||
|  | 
 | ||||||
|  |         self.scheduler_factory = lambda x: optim.lr_scheduler.ReduceLROnPlateau(x, 'min', patience=50) | ||||||
|  |         self.max_tv = 0 | ||||||
|  | 
 | ||||||
|  |         self.batch_size = 20 | ||||||
|  | 
 | ||||||
|  |         self.loss_target = lambda obj, cls: obj * cls | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class Experiment1(BaseConfig): | ||||||
|  |     """ | ||||||
|  |     Model that uses a maximum total variation, tv cannot go below this point. | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     def __init__(self): | ||||||
|  |         """ | ||||||
|  |         Change stuff... | ||||||
|  |         """ | ||||||
|  |         super().__init__() | ||||||
|  | 
 | ||||||
|  |         self.patch_name = 'Experiment1' | ||||||
|  |         self.max_tv = 0.165 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class Experiment2HighRes(Experiment1): | ||||||
|  |     """ | ||||||
|  |     Higher res | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     def __init__(self): | ||||||
|  |         """ | ||||||
|  |         Change stuff... | ||||||
|  |         """ | ||||||
|  |         super().__init__() | ||||||
|  | 
 | ||||||
|  |         self.max_tv = 0.165 | ||||||
|  |         self.patch_size = 400 | ||||||
|  |         self.patch_name = 'Exp2HighRes' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class Experiment3LowRes(Experiment1): | ||||||
|  |     """ | ||||||
|  |     Lower res | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     def __init__(self): | ||||||
|  |         """ | ||||||
|  |         Change stuff... | ||||||
|  |         """ | ||||||
|  |         super().__init__() | ||||||
|  | 
 | ||||||
|  |         self.max_tv = 0.165 | ||||||
|  |         self.patch_size = 100 | ||||||
|  |         self.patch_name = "Exp3LowRes" | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class Experiment4ClassOnly(Experiment1): | ||||||
|  |     """ | ||||||
|  |     Only minimise class score. | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     def __init__(self): | ||||||
|  |         """ | ||||||
|  |         Change stuff... | ||||||
|  |         """ | ||||||
|  |         super().__init__() | ||||||
|  | 
 | ||||||
|  |         self.patch_name = 'Experiment4ClassOnly' | ||||||
|  |         self.loss_target = lambda obj, cls: cls | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class Experiment1Desktop(Experiment1): | ||||||
|  |     """ | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     def __init__(self): | ||||||
|  |         """ | ||||||
|  |         Change batch size. | ||||||
|  |         """ | ||||||
|  |         super().__init__() | ||||||
|  | 
 | ||||||
|  |         self.batch_size = 8 | ||||||
|  |         self.patch_size = 400 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class ReproducePaperObj(BaseConfig): | ||||||
|  |     """ | ||||||
|  |     Reproduce the results from the paper: Generate a patch that minimises object score. | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     def __init__(self): | ||||||
|  |         super().__init__() | ||||||
|  | 
 | ||||||
|  |         self.batch_size = 8 | ||||||
|  |         self.patch_size = 300 | ||||||
|  | 
 | ||||||
|  |         self.patch_name = 'ObjectOnlyPaper' | ||||||
|  |         self.max_tv = 0.165 | ||||||
|  | 
 | ||||||
|  |         self.loss_target = lambda obj, cls: obj | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | patch_configs = { | ||||||
|  |     "base": BaseConfig, | ||||||
|  |     "exp1": Experiment1, | ||||||
|  |     "exp1_des": Experiment1Desktop, | ||||||
|  |     "exp2_high_res": Experiment2HighRes, | ||||||
|  |     "exp3_low_res": Experiment3LowRes, | ||||||
|  |     "exp4_class_only": Experiment4ClassOnly, | ||||||
|  |     "paper_obj": ReproducePaperObj | ||||||
|  | } | ||||||
							
								
								
									
										181
									
								
								predict.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										181
									
								
								predict.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,181 @@ | |||||||
|  | # -----------------------------------------------------------------------# | ||||||
|  | #   predict.py将单张图片预测、摄像头检测、FPS测试和目录遍历检测等功能 | ||||||
|  | #   整合到了一个py文件中,通过指定mode进行模式的修改。 | ||||||
|  | # -----------------------------------------------------------------------# | ||||||
|  | import time | ||||||
|  | 
 | ||||||
|  | import cv2 | ||||||
|  | import numpy as np | ||||||
|  | from PIL import Image | ||||||
|  | 
 | ||||||
|  | from yolo import YOLO | ||||||
|  | 
 | ||||||
|  | if __name__ == "__main__": | ||||||
|  |     yolo = YOLO() | ||||||
|  |     # ----------------------------------------------------------------------------------------------------------# | ||||||
|  |     #   mode用于指定测试的模式: | ||||||
|  |     #   'predict'           表示单张图片预测,如果想对预测过程进行修改,如保存图片,截取对象等,可以先看下方详细的注释 | ||||||
|  |     #   'video'             表示视频检测,可调用摄像头或者视频进行检测,详情查看下方注释。 | ||||||
|  |     #   'fps'               表示测试fps,使用的图片是img里面的street.jpg,详情查看下方注释。 | ||||||
|  |     #   'dir_predict'       表示遍历文件夹进行检测并保存。默认遍历img文件夹,保存img_out文件夹,详情查看下方注释。 | ||||||
|  |     #   'heatmap'           表示进行预测结果的热力图可视化,详情查看下方注释。 | ||||||
|  |     #   'export_onnx'       表示将模型导出为onnx,需要pytorch1.7.1以上。 | ||||||
|  |     # ----------------------------------------------------------------------------------------------------------# | ||||||
|  |     mode = "predict" | ||||||
|  |     # -------------------------------------------------------------------------# | ||||||
|  |     #   crop                指定了是否在单张图片预测后对目标进行截取 | ||||||
|  |     #   count               指定了是否进行目标的计数 | ||||||
|  |     #   crop、count仅在mode='predict'时有效 | ||||||
|  |     # -------------------------------------------------------------------------# | ||||||
|  |     crop = False | ||||||
|  |     count = False | ||||||
|  |     # ----------------------------------------------------------------------------------------------------------# | ||||||
|  |     #   video_path          用于指定视频的路径,当video_path=0时表示检测摄像头 | ||||||
|  |     #                       想要检测视频,则设置如video_path = "xxx.mp4"即可,代表读取出根目录下的xxx.mp4文件。 | ||||||
|  |     #   video_save_path     表示视频保存的路径,当video_save_path=""时表示不保存 | ||||||
|  |     #                       想要保存视频,则设置如video_save_path = "yyy.mp4"即可,代表保存为根目录下的yyy.mp4文件。 | ||||||
|  |     #   video_fps           用于保存的视频的fps | ||||||
|  |     # | ||||||
|  |     #   video_path、video_save_path和video_fps仅在mode='video'时有效 | ||||||
|  |     #   保存视频时需要ctrl+c退出或者运行到最后一帧才会完成完整的保存步骤。 | ||||||
|  |     # ----------------------------------------------------------------------------------------------------------# | ||||||
|  |     video_path = 0 | ||||||
|  |     video_save_path = "" | ||||||
|  |     video_fps = 25.0 | ||||||
|  |     # ----------------------------------------------------------------------------------------------------------# | ||||||
|  |     #   test_interval       用于指定测量fps的时候,图片检测的次数。理论上test_interval越大,fps越准确。 | ||||||
|  |     #   fps_image_path      用于指定测试的fps图片 | ||||||
|  |     #    | ||||||
|  |     #   test_interval和fps_image_path仅在mode='fps'有效 | ||||||
|  |     # ----------------------------------------------------------------------------------------------------------# | ||||||
|  |     test_interval = 100 | ||||||
|  |     fps_image_path = "img/street.jpg" | ||||||
|  |     # -------------------------------------------------------------------------# | ||||||
|  |     #   dir_origin_path     指定了用于检测的图片的文件夹路径 | ||||||
|  |     #   dir_save_path       指定了检测完图片的保存路径 | ||||||
|  |     #    | ||||||
|  |     #   dir_origin_path和dir_save_path仅在mode='dir_predict'时有效 | ||||||
|  |     # -------------------------------------------------------------------------# | ||||||
|  |     dir_origin_path = "img/" | ||||||
|  |     dir_save_path = "img_out/" | ||||||
|  |     # -------------------------------------------------------------------------# | ||||||
|  |     #   heatmap_save_path   热力图的保存路径,默认保存在model_data下 | ||||||
|  |     #    | ||||||
|  |     #   heatmap_save_path仅在mode='heatmap'有效 | ||||||
|  |     # -------------------------------------------------------------------------# | ||||||
|  |     heatmap_save_path = "model_data/heatmap_vision.png" | ||||||
|  |     # -------------------------------------------------------------------------# | ||||||
|  |     #   simplify            使用Simplify onnx | ||||||
|  |     #   onnx_save_path      指定了onnx的保存路径 | ||||||
|  |     # -------------------------------------------------------------------------# | ||||||
|  |     simplify = True | ||||||
|  |     onnx_save_path = "model_data/models.onnx" | ||||||
|  | 
 | ||||||
|  |     if mode == "predict": | ||||||
|  |         ''' | ||||||
|  |         1、如果想要进行检测完的图片的保存,利用r_image.save("img.jpg")即可保存,直接在predict.py里进行修改即可。  | ||||||
|  |         2、如果想要获得预测框的坐标,可以进入yolo.detect_image函数,在绘图部分读取top,left,bottom,right这四个值。 | ||||||
|  |         3、如果想要利用预测框截取下目标,可以进入yolo.detect_image函数,在绘图部分利用获取到的top,left,bottom,right这四个值 | ||||||
|  |         在原图上利用矩阵的方式进行截取。 | ||||||
|  |         4、如果想要在预测图上写额外的字,比如检测到的特定目标的数量,可以进入yolo.detect_image函数,在绘图部分对predicted_class进行判断, | ||||||
|  |         比如判断if predicted_class == 'car': 即可判断当前目标是否为车,然后记录数量即可。利用draw.text即可写字。 | ||||||
|  |         ''' | ||||||
|  |         while True: | ||||||
|  |             img = input('Input image filename:') | ||||||
|  |             # img/street.jpg | ||||||
|  |             # img/street_a3.jpg | ||||||
|  |             try: | ||||||
|  |                 image = Image.open(img) | ||||||
|  |             except: | ||||||
|  |                 print('Open Error! Try again!') | ||||||
|  |                 continue | ||||||
|  |             else: | ||||||
|  |                 r_image = yolo.detect_image(image, crop=crop, count=count) | ||||||
|  |                 # r_image.show() | ||||||
|  |                 r_image.save("duffision.png") | ||||||
|  | 
 | ||||||
|  |     elif mode == "video": | ||||||
|  |         capture = cv2.VideoCapture(video_path) | ||||||
|  |         if video_save_path != "": | ||||||
|  |             fourcc = cv2.VideoWriter_fourcc(*'XVID') | ||||||
|  |             size = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))) | ||||||
|  |             out = cv2.VideoWriter(video_save_path, fourcc, video_fps, size) | ||||||
|  | 
 | ||||||
|  |         ref, frame = capture.read() | ||||||
|  |         if not ref: | ||||||
|  |             raise ValueError("未能正确读取摄像头(视频),请注意是否正确安装摄像头(是否正确填写视频路径)。") | ||||||
|  | 
 | ||||||
|  |         fps = 0.0 | ||||||
|  |         while (True): | ||||||
|  |             t1 = time.time() | ||||||
|  |             # 读取某一帧 | ||||||
|  |             ref, frame = capture.read() | ||||||
|  |             if not ref: | ||||||
|  |                 break | ||||||
|  |             # 格式转变,BGRtoRGB | ||||||
|  |             frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | ||||||
|  |             # 转变成Image | ||||||
|  |             frame = Image.fromarray(np.uint8(frame)) | ||||||
|  |             # 进行检测 | ||||||
|  |             frame = np.array(yolo.detect_image(frame)) | ||||||
|  |             # RGBtoBGR满足opencv显示格式 | ||||||
|  |             frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) | ||||||
|  | 
 | ||||||
|  |             fps = (fps + (1. / (time.time() - t1))) / 2 | ||||||
|  |             print("fps= %.2f" % (fps)) | ||||||
|  |             frame = cv2.putText(frame, "fps= %.2f" % (fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) | ||||||
|  | 
 | ||||||
|  |             cv2.imshow("video", frame) | ||||||
|  |             c = cv2.waitKey(1) & 0xff | ||||||
|  |             if video_save_path != "": | ||||||
|  |                 out.write(frame) | ||||||
|  | 
 | ||||||
|  |             if c == 27: | ||||||
|  |                 capture.release() | ||||||
|  |                 break | ||||||
|  | 
 | ||||||
|  |         print("Video Detection Done!") | ||||||
|  |         capture.release() | ||||||
|  |         if video_save_path != "": | ||||||
|  |             print("Save processed video to the path :" + video_save_path) | ||||||
|  |             out.release() | ||||||
|  |         cv2.destroyAllWindows() | ||||||
|  | 
 | ||||||
|  |     elif mode == "fps": | ||||||
|  |         img = Image.open(fps_image_path) | ||||||
|  |         tact_time = yolo.get_FPS(img, test_interval) | ||||||
|  |         print(str(tact_time) + ' seconds, ' + str(1 / tact_time) + 'FPS, @batch_size 1') | ||||||
|  | 
 | ||||||
|  |     elif mode == "dir_predict": | ||||||
|  |         import os | ||||||
|  | 
 | ||||||
|  |         from tqdm import tqdm | ||||||
|  | 
 | ||||||
|  |         img_names = os.listdir(dir_origin_path) | ||||||
|  |         for img_name in tqdm(img_names): | ||||||
|  |             if img_name.lower().endswith( | ||||||
|  |                     ('.bmp', '.dib', '.png', '.jpg', '.jpeg', '.pbm', '.pgm', '.ppm', '.tif', '.tiff')): | ||||||
|  |                 image_path = os.path.join(dir_origin_path, img_name) | ||||||
|  |                 image = Image.open(image_path) | ||||||
|  |                 r_image = yolo.detect_image(image) | ||||||
|  |                 if not os.path.exists(dir_save_path): | ||||||
|  |                     os.makedirs(dir_save_path) | ||||||
|  |                 r_image.save(os.path.join(dir_save_path, img_name.replace(".jpg", ".png")), quality=95, subsampling=0) | ||||||
|  | 
 | ||||||
|  |     elif mode == "heatmap": | ||||||
|  |         while True: | ||||||
|  |             img = input('Input image filename:') | ||||||
|  |             try: | ||||||
|  |                 image = Image.open(img) | ||||||
|  |             except: | ||||||
|  |                 print('Open Error! Try again!') | ||||||
|  |                 continue | ||||||
|  |             else: | ||||||
|  |                 yolo.detect_heatmap(image, heatmap_save_path) | ||||||
|  | 
 | ||||||
|  |     elif mode == "export_onnx": | ||||||
|  |         yolo.convert_to_onnx(simplify, onnx_save_path) | ||||||
|  | 
 | ||||||
|  |     else: | ||||||
|  |         raise AssertionError( | ||||||
|  |             "Please specify the correct mode: 'predict', 'video', 'fps', 'heatmap', 'export_onnx', 'dir_predict'.") | ||||||
							
								
								
									
										109
									
								
								predict_with_windows.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										109
									
								
								predict_with_windows.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,109 @@ | |||||||
|  | import time | ||||||
|  | 
 | ||||||
|  | import pyautogui | ||||||
|  | import cv2 | ||||||
|  | import numpy as np | ||||||
|  | from PIL import Image | ||||||
|  | 
 | ||||||
|  | from yolo import YOLO | ||||||
|  | 
 | ||||||
|  | if __name__ == "__main__": | ||||||
|  |     yolo = YOLO() | ||||||
|  |     # ----------------------------------------------------------------------------------------------------------# | ||||||
|  |     #   mode用于指定测试的模式: | ||||||
|  |     #   'predict'           表示单张图片预测,如果想对预测过程进行修改,如保存图片,截取对象等,可以先看下方详细的注释 | ||||||
|  |     #   'video'             表示视频检测,可调用摄像头或者视频进行检测,详情查看下方注释。 | ||||||
|  |     #   'fps'               表示测试fps,使用的图片是img里面的street.jpg,详情查看下方注释。 | ||||||
|  |     #   'dir_predict'       表示遍历文件夹进行检测并保存。默认遍历img文件夹,保存img_out文件夹,详情查看下方注释。 | ||||||
|  |     #   'heatmap'           表示进行预测结果的热力图可视化,详情查看下方注释。 | ||||||
|  |     #   'export_onnx'       表示将模型导出为onnx,需要pytorch1.7.1以上。 | ||||||
|  |     # ----------------------------------------------------------------------------------------------------------# | ||||||
|  |     mode = "predict" | ||||||
|  |     # -------------------------------------------------------------------------# | ||||||
|  |     #   crop                指定了是否在单张图片预测后对目标进行截取 | ||||||
|  |     #   count               指定了是否进行目标的计数 | ||||||
|  |     #   crop、count仅在mode='predict'时有效 | ||||||
|  |     # -------------------------------------------------------------------------# | ||||||
|  |     crop = False | ||||||
|  |     count = False | ||||||
|  |     # ----------------------------------------------------------------------------------------------------------# | ||||||
|  |     #   video_path          用于指定视频的路径,当video_path=0时表示检测摄像头 | ||||||
|  |     #                       想要检测视频,则设置如video_path = "xxx.mp4"即可,代表读取出根目录下的xxx.mp4文件。 | ||||||
|  |     #   video_save_path     表示视频保存的路径,当video_save_path=""时表示不保存 | ||||||
|  |     #                       想要保存视频,则设置如video_save_path = "yyy.mp4"即可,代表保存为根目录下的yyy.mp4文件。 | ||||||
|  |     #   video_fps           用于保存的视频的fps | ||||||
|  |     # | ||||||
|  |     #   video_path、video_save_path和video_fps仅在mode='video'时有效 | ||||||
|  |     #   保存视频时需要ctrl+c退出或者运行到最后一帧才会完成完整的保存步骤。 | ||||||
|  |     # ----------------------------------------------------------------------------------------------------------# | ||||||
|  |     video_path = 0 | ||||||
|  |     video_save_path = "" | ||||||
|  |     video_fps = 25.0 | ||||||
|  |     # ----------------------------------------------------------------------------------------------------------# | ||||||
|  |     #   test_interval       用于指定测量fps的时候,图片检测的次数。理论上test_interval越大,fps越准确。 | ||||||
|  |     #   fps_image_path      用于指定测试的fps图片 | ||||||
|  |     # | ||||||
|  |     #   test_interval和fps_image_path仅在mode='fps'有效 | ||||||
|  |     # ----------------------------------------------------------------------------------------------------------# | ||||||
|  |     test_interval = 100 | ||||||
|  |     fps_image_path = "img/street.jpg" | ||||||
|  |     # -------------------------------------------------------------------------# | ||||||
|  |     #   dir_origin_path     指定了用于检测的图片的文件夹路径 | ||||||
|  |     #   dir_save_path       指定了检测完图片的保存路径 | ||||||
|  |     # | ||||||
|  |     #   dir_origin_path和dir_save_path仅在mode='dir_predict'时有效 | ||||||
|  |     # -------------------------------------------------------------------------# | ||||||
|  |     dir_origin_path = "img/" | ||||||
|  |     dir_save_path = "img_out/" | ||||||
|  |     # -------------------------------------------------------------------------# | ||||||
|  |     #   heatmap_save_path   热力图的保存路径,默认保存在model_data下 | ||||||
|  |     # | ||||||
|  |     #   heatmap_save_path仅在mode='heatmap'有效 | ||||||
|  |     # -------------------------------------------------------------------------# | ||||||
|  |     heatmap_save_path = "model_data/heatmap_vision.png" | ||||||
|  |     # -------------------------------------------------------------------------# | ||||||
|  |     #   simplify            使用Simplify onnx | ||||||
|  |     #   onnx_save_path      指定了onnx的保存路径 | ||||||
|  |     # -------------------------------------------------------------------------# | ||||||
|  |     simplify = True | ||||||
|  |     onnx_save_path = "model_data/models.onnx" | ||||||
|  | 
 | ||||||
|  |     if mode == "predict": | ||||||
|  |         ''' | ||||||
|  |         1、如果想要进行检测完的图片的保存,利用r_image.save("img.jpg")即可保存,直接在predict.py里进行修改即可。  | ||||||
|  |         2、如果想要获得预测框的坐标,可以进入yolo.detect_image函数,在绘图部分读取top,left,bottom,right这四个值。 | ||||||
|  |         3、如果想要利用预测框截取下目标,可以进入yolo.detect_image函数,在绘图部分利用获取到的top,left,bottom,right这四个值 | ||||||
|  |         在原图上利用矩阵的方式进行截取。 | ||||||
|  |         4、如果想要在预测图上写额外的字,比如检测到的特定目标的数量,可以进入yolo.detect_image函数,在绘图部分对predicted_class进行判断, | ||||||
|  |         比如判断if predicted_class == 'car': 即可判断当前目标是否为车,然后记录数量即可。利用draw.text即可写字。 | ||||||
|  |         ''' | ||||||
|  |         while True: | ||||||
|  |             # img = pyautogui.screenshot(region=[300, 50, 200, 100])  # 分别代表:左上角坐标,宽高 | ||||||
|  |             # img = pyautogui.screenshot()  # 分别代表:左上角坐标,宽高 | ||||||
|  |             # 对获取的图片转换成二维矩阵形式,后再将RGB转成BGR | ||||||
|  |             # 因为imshow,默认通道顺序是BGR,而pyautogui默认是RGB所以要转换一下,不然会有点问题 | ||||||
|  |             # img = cv2.cvtColor(np.asarray(img), cv2.COLOR_RGB2BGR) | ||||||
|  |             # img/street.jpg | ||||||
|  |             # img/street_a3.jpg | ||||||
|  |             try: | ||||||
|  |                 time.sleep(0.3) | ||||||
|  |                 # image = Image.fromarray(np.asarray(pyautogui.screenshot(region=[1920/2, 300, 1920/2, 1080]))) | ||||||
|  |                 image = Image.fromarray(np.asarray(pyautogui.screenshot())) | ||||||
|  |             except: | ||||||
|  |                 print('Open Error! Try again!') | ||||||
|  |                 continue | ||||||
|  |             else: | ||||||
|  |                 r_image = yolo.detect_image(image, crop=crop, count=count) | ||||||
|  |                 img = cv2.cvtColor(np.asarray(r_image), cv2.COLOR_RGB2BGR) | ||||||
|  |                 # img = cv2.resize(img, dsize=(1600, 860))  # (宽度,高度) | ||||||
|  |                 img = cv2.resize(img, dsize=(1920, 1080))  # (宽度,高度) | ||||||
|  |                 cv2.imshow("screen", img) | ||||||
|  |                 # time.sleep(1) | ||||||
|  |                 cv2.waitKey(1) | ||||||
|  | 
 | ||||||
|  |                 c = cv2.waitKey(1) & 0xff | ||||||
|  |                 # print(c) | ||||||
|  |                 if c == 113: | ||||||
|  |                     break | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
							
								
								
									
										9
									
								
								requirements.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								requirements.txt
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,9 @@ | |||||||
|  | scipy==1.2.1 | ||||||
|  | numpy==1.17.0 | ||||||
|  | matplotlib==3.1.2 | ||||||
|  | opencv_python==4.1.2.30 | ||||||
|  | torch==1.2.0 | ||||||
|  | torchvision==0.4.0 | ||||||
|  | tqdm==4.60.0 | ||||||
|  | Pillow==8.2.0 | ||||||
|  | h5py==2.10.0 | ||||||
							
								
								
									
										34
									
								
								summary.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								summary.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,34 @@ | |||||||
|  | # --------------------------------------------# | ||||||
|  | #   该部分代码用于看网络结构 | ||||||
|  | # --------------------------------------------# | ||||||
|  | import torch | ||||||
|  | # from thop import clever_format, profile | ||||||
|  | from torchsummary import summary | ||||||
|  | 
 | ||||||
|  | from nets.yolo import YoloBody | ||||||
|  | 
 | ||||||
|  | if __name__ == "__main__": | ||||||
|  |     input_shape = [416, 416] | ||||||
|  |     anchors_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] | ||||||
|  |     num_classes = 80 | ||||||
|  | 
 | ||||||
|  |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | ||||||
|  |     m = YoloBody(anchors_mask, num_classes) | ||||||
|  |     print(m) | ||||||
|  |     print('-' * 80) | ||||||
|  | 
 | ||||||
|  |     m = m.to(device) | ||||||
|  |     summary(m, (3, input_shape[0], input_shape[1])) | ||||||
|  | 
 | ||||||
|  |     # dummy_input = torch.randn(1, 3, input_shape[0], input_shape[1]).to(device) | ||||||
|  |     # flops, params = profile(m.to(device), (dummy_input,), verbose=False) | ||||||
|  |     # --------------------------------------------------------# | ||||||
|  |     #   flops * 2是因为profile没有将卷积作为两个operations | ||||||
|  |     #   有些论文将卷积算乘法、加法两个operations。此时乘2 | ||||||
|  |     #   有些论文只考虑乘法的运算次数,忽略加法。此时不乘2 | ||||||
|  |     #   本代码选择乘2,参考YOLOX。 | ||||||
|  |     # --------------------------------------------------------# | ||||||
|  |     # flops = flops * 2 | ||||||
|  |     # flops, params = clever_format([flops, params], "%.3f") | ||||||
|  |     # print('Total GFLOPS: %s' % (flops)) | ||||||
|  |     # print('Total params: %s' % (params)) | ||||||
							
								
								
									
										225
									
								
								train_patch.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										225
									
								
								train_patch.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,225 @@ | |||||||
|  | """ | ||||||
|  | Training code for Adversarial patch training | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | import PIL | ||||||
|  | from torch.utils.tensorboard import SummaryWriter | ||||||
|  | 
 | ||||||
|  | # import load_data | ||||||
|  | from tqdm import tqdm | ||||||
|  | 
 | ||||||
|  | from load_data import *  # 可能导致多次导入问题? | ||||||
|  | import gc | ||||||
|  | import matplotlib.pyplot as plt | ||||||
|  | from torch import autograd | ||||||
|  | from torchvision import transforms | ||||||
|  | 
 | ||||||
|  | import subprocess | ||||||
|  | 
 | ||||||
|  | import patch_config | ||||||
|  | import sys | ||||||
|  | import time | ||||||
|  | 
 | ||||||
|  | from yolo import YOLO | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class PatchTrainer(object): | ||||||
|  |     def __init__(self, mode): | ||||||
|  |         self.config = patch_config.patch_configs[mode]()  # 获取对应的配置类 | ||||||
|  | 
 | ||||||
|  |         # self.darknet_model = Darknet(self.config.cfgfile)  # 加载yolo模型 | ||||||
|  |         # self.darknet_model.load_weights(self.config.weightfile)  # 默认 YOLOv2 MS COCO weights, person编号是0 | ||||||
|  |         self.darknet_model = YOLO().net | ||||||
|  |         self.darknet_model = self.darknet_model.eval().cuda()  # TODO: Why eval? | ||||||
|  |         self.patch_applier = PatchApplier().cuda()  # 对图像应用对抗补丁 | ||||||
|  |         self.patch_transformer = PatchTransformer().cuda()  # 变换补丁到指定大小并产生抖动 | ||||||
|  |         # self.prob_extractor = MaxProbExtractor(0, 80, self.config).cuda()  # 提取最大类别概率 | ||||||
|  |         self.prob_extractor = MaxProbExtractor(0, 1, self.config).cuda()  # 提取最大类别概率 | ||||||
|  |         self.nps_calculator = NPSCalculator(self.config.printfile, self.config.patch_size).cuda()  # 不可打印分数 | ||||||
|  |         self.total_variation = TotalVariation().cuda()  # 计算补丁的所有变化程度 | ||||||
|  | 
 | ||||||
|  |         self.writer = self.init_tensorboard(mode) | ||||||
|  | 
 | ||||||
|  |     def init_tensorboard(self, name=None): | ||||||
|  |         subprocess.Popen(['tensorboard', '--logdir=runs']) | ||||||
|  |         if name is not None: | ||||||
|  |             time_str = time.strftime("%Y%m%d-%H%M%S") | ||||||
|  |             return SummaryWriter(f'runs/{time_str}_{name}') | ||||||
|  |         else: | ||||||
|  |             return SummaryWriter() | ||||||
|  | 
 | ||||||
|  |     def train(self): | ||||||
|  |         """ | ||||||
|  |         Optimize a patch to generate an adversarial example. | ||||||
|  |         :return: Nothing | ||||||
|  |         """ | ||||||
|  | 
 | ||||||
|  |         img_size = self.darknet_model.height  # 416 | ||||||
|  |         # print('batch_size:',batch_size) | ||||||
|  |         batch_size = self.config.batch_size  # 8 | ||||||
|  |         n_epochs = 200 | ||||||
|  |         # n_epochs = 5 | ||||||
|  |         # max_lab = 20  # label的最大长度 | ||||||
|  |         max_lab = 8 | ||||||
|  | 
 | ||||||
|  |         time_str = time.strftime("%Y%m%d-%H%M%S") | ||||||
|  | 
 | ||||||
|  |         # Generate stating point | ||||||
|  |         # adv_patch_cpu = self.generate_patch("gray")  # 生成一个灰图,初始化为0.5 | ||||||
|  |         adv_patch_cpu = self.read_image("saved_patches/patchnew0.jpg") | ||||||
|  | 
 | ||||||
|  |         adv_patch_cpu.requires_grad_(True) | ||||||
|  | 
 | ||||||
|  |         train_loader = torch.utils.data.DataLoader( | ||||||
|  |             InriaDataset(self.config.img_dir, self.config.lab_dir, max_lab, img_size, | ||||||
|  |                          shuffle=True), | ||||||
|  |             batch_size=batch_size, | ||||||
|  |             shuffle=True, | ||||||
|  |             num_workers=0)  # 与 from load_data import * 搭配导致多少导入? | ||||||
|  |         self.epoch_length = len(train_loader) | ||||||
|  |         print(f'One epoch is {len(train_loader)}') | ||||||
|  | 
 | ||||||
|  |         optimizer = optim.Adam([adv_patch_cpu], lr=self.config.start_learning_rate, amsgrad=True)  # 更新的是那个补丁 | ||||||
|  |         scheduler = self.config.scheduler_factory(optimizer)  # ICLR-2018年最佳论文提出的Adam改进版Amsgrad | ||||||
|  | 
 | ||||||
|  |         et0 = time.time() | ||||||
|  |         for epoch in range(n_epochs): | ||||||
|  |             ep_det_loss = 0 | ||||||
|  |             ep_nps_loss = 0 | ||||||
|  |             ep_tv_loss = 0 | ||||||
|  |             ep_loss = 0 | ||||||
|  |             bt0 = time.time() | ||||||
|  |             for i_batch, (img_batch, lab_batch) in tqdm(enumerate(train_loader), desc=f'Running epoch {epoch}', | ||||||
|  |                                                         total=self.epoch_length): | ||||||
|  |                 with autograd.detect_anomaly():  # 1.运行前向时开启异常检测功能,则在反向时会打印引起反向失败的前向操作堆栈 2.反向计算出现“nan”时引发异常 | ||||||
|  |                     img_batch = img_batch.cuda()  # 8, 3, 416, 416 | ||||||
|  |                     lab_batch = lab_batch.cuda()  # 8, 14, 5  为什么要把人数的标签补到14? | ||||||
|  |                     # print('TRAINING EPOCH %i, BATCH %i'%(epoch, i_batch)) | ||||||
|  |                     adv_patch = adv_patch_cpu.cuda()  # 3, 300, 300 | ||||||
|  |                     adv_batch_t = self.patch_transformer(adv_patch, lab_batch, img_size, do_rotate=True, rand_loc=False) | ||||||
|  |                     p_img_batch = self.patch_applier(img_batch, adv_batch_t) | ||||||
|  |                     p_img_batch = F.interpolate(p_img_batch, | ||||||
|  |                                                 (self.darknet_model.height, self.darknet_model.width))  # 确保和图片大小一致 | ||||||
|  |                      | ||||||
|  |                     # print('++++++++++++p_img_batch:+++++++++++++',p_img_batch.shape) | ||||||
|  |                     img = p_img_batch[1, :, :, ] | ||||||
|  |                     img = transforms.ToPILImage()(img.detach().cpu()) | ||||||
|  |                     # img.show() | ||||||
|  | 
 | ||||||
|  |                     outputs = self.darknet_model(p_img_batch)  # 输入8,3,416,416  输出8,425, 13, 13  ,其中425是5*(5+80) | ||||||
|  |                     max_prob = 0 | ||||||
|  |                     nps = 0 | ||||||
|  |                     tv = 0 | ||||||
|  |                     for l in range(len(outputs)):  # 三组不同分辨率大小的输出特征分别计算 | ||||||
|  |                         output = outputs[l] | ||||||
|  |                         max_prob += self.prob_extractor(output) | ||||||
|  |                         nps += self.nps_calculator(adv_patch) | ||||||
|  |                         tv += self.total_variation(adv_patch) | ||||||
|  | 
 | ||||||
|  |                     nps_loss = nps * 0.01 | ||||||
|  |                     tv_loss = tv * 2.5 | ||||||
|  |                     det_loss = torch.mean(max_prob)  # 把人的置值度当成损失 | ||||||
|  |                     loss = det_loss + nps_loss + torch.max(tv_loss, torch.tensor(0.1).cuda()) | ||||||
|  | 
 | ||||||
|  |                     ep_det_loss += det_loss.detach().cpu().numpy() | ||||||
|  |                     ep_nps_loss += nps_loss.detach().cpu().numpy() | ||||||
|  |                     ep_tv_loss += tv_loss.detach().cpu().numpy() | ||||||
|  |                     ep_loss += loss | ||||||
|  | 
 | ||||||
|  |                     loss.backward() | ||||||
|  |                     optimizer.step() | ||||||
|  |                     optimizer.zero_grad() | ||||||
|  |                     adv_patch_cpu.data.clamp_(0, 1)  # keep patch in image range | ||||||
|  | 
 | ||||||
|  |                     bt1 = time.time() | ||||||
|  |                     if i_batch % 5 == 0: | ||||||
|  |                         iteration = self.epoch_length * epoch + i_batch | ||||||
|  | 
 | ||||||
|  |                         self.writer.add_scalar('total_loss', loss.detach().cpu().numpy(), iteration) | ||||||
|  |                         self.writer.add_scalar('loss/det_loss', det_loss.detach().cpu().numpy(), iteration) | ||||||
|  |                         self.writer.add_scalar('loss/nps_loss', nps_loss.detach().cpu().numpy(), iteration) | ||||||
|  |                         self.writer.add_scalar('loss/tv_loss', tv_loss.detach().cpu().numpy(), iteration) | ||||||
|  |                         self.writer.add_scalar('misc/epoch', epoch, iteration) | ||||||
|  |                         self.writer.add_scalar('misc/learning_rate', optimizer.param_groups[0]["lr"], iteration) | ||||||
|  | 
 | ||||||
|  |                         self.writer.add_image('patch', adv_patch_cpu, iteration) | ||||||
|  |                     if i_batch + 1 >= len(train_loader): | ||||||
|  |                         print('\n') | ||||||
|  |                     else: | ||||||
|  |                         del adv_batch_t, output, max_prob, det_loss, p_img_batch, nps_loss, tv_loss, loss | ||||||
|  |                         torch.cuda.empty_cache() | ||||||
|  |                     bt0 = time.time() | ||||||
|  |             et1 = time.time() | ||||||
|  |             ep_det_loss = ep_det_loss / len(train_loader) | ||||||
|  |             ep_nps_loss = ep_nps_loss / len(train_loader) | ||||||
|  |             ep_tv_loss = ep_tv_loss / len(train_loader) | ||||||
|  |             ep_loss = ep_loss / len(train_loader) | ||||||
|  | 
 | ||||||
|  |             # im = transforms.ToPILImage('RGB')(adv_patch_cpu) | ||||||
|  |             # plt.imshow(im) | ||||||
|  |             # plt.savefig(f'pics/{time_str}_{self.config.patch_name}_{epoch}.png') | ||||||
|  | 
 | ||||||
|  |             scheduler.step(ep_loss) | ||||||
|  |             if True: | ||||||
|  |                 print('  EPOCH NR: ', epoch), | ||||||
|  |                 print('EPOCH LOSS: ', ep_loss) | ||||||
|  |                 print('  DET LOSS: ', ep_det_loss) | ||||||
|  |                 print('  NPS LOSS: ', ep_nps_loss) | ||||||
|  |                 print('   TV LOSS: ', ep_tv_loss) | ||||||
|  |                 print('EPOCH TIME: ', et1 - et0) | ||||||
|  |                 # im = transforms.ToPILImage('RGB')(adv_patch_cpu) | ||||||
|  |                 # plt.imshow(im) | ||||||
|  |                 # plt.show() | ||||||
|  |                 # im.save("saved_patches/patchnew1.jpg") | ||||||
|  |                 im = transforms.ToPILImage('RGB')(adv_patch_cpu) | ||||||
|  |                 if epoch >= 3: | ||||||
|  |                     im.save(f"saved_patches/patchnew1_t1_{epoch}_{time_str}.jpg") | ||||||
|  |                 del adv_batch_t, output, max_prob, det_loss, p_img_batch, nps_loss, tv_loss, loss | ||||||
|  |                 torch.cuda.empty_cache() | ||||||
|  |             et0 = time.time() | ||||||
|  | 
 | ||||||
|  |     def generate_patch(self, type): | ||||||
|  |         """ | ||||||
|  |         Generate a random patch as a starting point for optimization. | ||||||
|  | 
 | ||||||
|  |         :param type: Can be 'gray' or 'random'. Whether or not generate a gray or a random patch. | ||||||
|  |         :return: | ||||||
|  |         """ | ||||||
|  |         if type == 'gray': | ||||||
|  |             adv_patch_cpu = torch.full((3, self.config.patch_size, self.config.patch_size), 0.5) | ||||||
|  |         elif type == 'random': | ||||||
|  |             adv_patch_cpu = torch.rand((3, self.config.patch_size, self.config.patch_size)) | ||||||
|  | 
 | ||||||
|  |         return adv_patch_cpu | ||||||
|  | 
 | ||||||
|  |     def read_image(self, path): | ||||||
|  |         """ | ||||||
|  |         Read an input image to be used as a patch | ||||||
|  | 
 | ||||||
|  |         :param path: Path to the image to be read. | ||||||
|  |         :return: Returns the transformed patch as a pytorch Tensor. | ||||||
|  |         """ | ||||||
|  |         patch_img = Image.open(path).convert('RGB') | ||||||
|  |         tf = transforms.Resize((self.config.patch_size, self.config.patch_size)) | ||||||
|  |         patch_img = tf(patch_img) | ||||||
|  |         tf = transforms.ToTensor() | ||||||
|  | 
 | ||||||
|  |         adv_patch_cpu = tf(patch_img) | ||||||
|  |         return adv_patch_cpu | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def main(): | ||||||
|  |     if len(sys.argv) != 2: | ||||||
|  |         print('You need to supply (only) a configuration mode.') | ||||||
|  |         print('Possible modes are:') | ||||||
|  |         print(patch_config.patch_configs)  # 一般传入paper_obj | ||||||
|  |   | ||||||
|  |     # print('sys.argv:',sys.argv) | ||||||
|  |     trainer = PatchTrainer(sys.argv[1]) | ||||||
|  |     trainer.train() | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | if __name__ == '__main__': | ||||||
|  |     main() | ||||||
							
								
								
									
										1
									
								
								utils/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								utils/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1 @@ | |||||||
|  | # | ||||||
							
								
								
									
										241
									
								
								utils/callbacks.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										241
									
								
								utils/callbacks.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,241 @@ | |||||||
|  | import datetime | ||||||
|  | import os | ||||||
|  | 
 | ||||||
|  | import torch | ||||||
|  | import matplotlib | ||||||
|  | 
 | ||||||
|  | import scipy.signal | ||||||
|  | from matplotlib import pyplot as plt | ||||||
|  | from torch.utils.tensorboard import SummaryWriter | ||||||
|  | 
 | ||||||
|  | import shutil | ||||||
|  | import numpy as np | ||||||
|  | 
 | ||||||
|  | from PIL import Image | ||||||
|  | from tqdm import tqdm | ||||||
|  | from .utils import cvtColor, preprocess_input, resize_image | ||||||
|  | from .utils_bbox import DecodeBox | ||||||
|  | from .utils_map import get_coco_map, get_map | ||||||
|  | 
 | ||||||
|  | matplotlib.use('Agg') | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class LossHistory(): | ||||||
|  |     def __init__(self, log_dir, model, input_shape): | ||||||
|  |         self.log_dir = log_dir | ||||||
|  |         self.losses = [] | ||||||
|  |         self.val_loss = [] | ||||||
|  | 
 | ||||||
|  |         os.makedirs(self.log_dir) | ||||||
|  |         self.writer = SummaryWriter(self.log_dir) | ||||||
|  |         try: | ||||||
|  |             dummy_input = torch.randn(2, 3, input_shape[0], input_shape[1]) | ||||||
|  |             self.writer.add_graph(model, dummy_input) | ||||||
|  |         except: | ||||||
|  |             pass | ||||||
|  | 
 | ||||||
|  |     def append_loss(self, epoch, loss, val_loss): | ||||||
|  |         if not os.path.exists(self.log_dir): | ||||||
|  |             os.makedirs(self.log_dir) | ||||||
|  | 
 | ||||||
|  |         self.losses.append(loss) | ||||||
|  |         self.val_loss.append(val_loss) | ||||||
|  | 
 | ||||||
|  |         with open(os.path.join(self.log_dir, "epoch_loss.txt"), 'a') as f: | ||||||
|  |             f.write(str(loss)) | ||||||
|  |             f.write("\n") | ||||||
|  |         with open(os.path.join(self.log_dir, "epoch_val_loss.txt"), 'a') as f: | ||||||
|  |             f.write(str(val_loss)) | ||||||
|  |             f.write("\n") | ||||||
|  | 
 | ||||||
|  |         self.writer.add_scalar('loss', loss, epoch) | ||||||
|  |         self.writer.add_scalar('val_loss', val_loss, epoch) | ||||||
|  |         self.loss_plot() | ||||||
|  | 
 | ||||||
|  |     def loss_plot(self): | ||||||
|  |         iters = range(len(self.losses)) | ||||||
|  | 
 | ||||||
|  |         plt.figure() | ||||||
|  |         plt.plot(iters, self.losses, 'red', linewidth=2, label='train loss') | ||||||
|  |         plt.plot(iters, self.val_loss, 'coral', linewidth=2, label='val loss') | ||||||
|  |         try: | ||||||
|  |             if len(self.losses) < 25: | ||||||
|  |                 num = 5 | ||||||
|  |             else: | ||||||
|  |                 num = 15 | ||||||
|  | 
 | ||||||
|  |             plt.plot(iters, scipy.signal.savgol_filter(self.losses, num, 3), 'green', linestyle='--', linewidth=2, | ||||||
|  |                      label='smooth train loss') | ||||||
|  |             plt.plot(iters, scipy.signal.savgol_filter(self.val_loss, num, 3), '#8B4513', linestyle='--', linewidth=2, | ||||||
|  |                      label='smooth val loss') | ||||||
|  |         except: | ||||||
|  |             pass | ||||||
|  | 
 | ||||||
|  |         plt.grid(True) | ||||||
|  |         plt.xlabel('Epoch') | ||||||
|  |         plt.ylabel('Loss') | ||||||
|  |         plt.legend(loc="upper right") | ||||||
|  | 
 | ||||||
|  |         plt.savefig(os.path.join(self.log_dir, "epoch_loss.png")) | ||||||
|  | 
 | ||||||
|  |         plt.cla() | ||||||
|  |         plt.close("all") | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class EvalCallback(): | ||||||
|  |     def __init__(self, net, input_shape, anchors, anchors_mask, class_names, num_classes, val_lines, log_dir, cuda, \ | ||||||
|  |                  map_out_path=".temp_map_out", max_boxes=100, confidence=0.05, nms_iou=0.5, letterbox_image=True, | ||||||
|  |                  MINOVERLAP=0.5, eval_flag=True, period=1): | ||||||
|  |         super(EvalCallback, self).__init__() | ||||||
|  | 
 | ||||||
|  |         self.net = net | ||||||
|  |         self.input_shape = input_shape | ||||||
|  |         self.anchors = anchors | ||||||
|  |         self.anchors_mask = anchors_mask | ||||||
|  |         self.class_names = class_names | ||||||
|  |         self.num_classes = num_classes | ||||||
|  |         self.val_lines = val_lines | ||||||
|  |         self.log_dir = log_dir | ||||||
|  |         self.cuda = cuda | ||||||
|  |         self.map_out_path = map_out_path | ||||||
|  |         self.max_boxes = max_boxes | ||||||
|  |         self.confidence = confidence | ||||||
|  |         self.nms_iou = nms_iou | ||||||
|  |         self.letterbox_image = letterbox_image | ||||||
|  |         self.MINOVERLAP = MINOVERLAP | ||||||
|  |         self.eval_flag = eval_flag | ||||||
|  |         self.period = period | ||||||
|  | 
 | ||||||
|  |         self.bbox_util = DecodeBox(self.anchors, self.num_classes, (self.input_shape[0], self.input_shape[1]), | ||||||
|  |                                    self.anchors_mask) | ||||||
|  | 
 | ||||||
|  |         self.maps = [0] | ||||||
|  |         self.epoches = [0] | ||||||
|  |         if self.eval_flag: | ||||||
|  |             with open(os.path.join(self.log_dir, "epoch_map.txt"), 'a') as f: | ||||||
|  |                 f.write(str(0)) | ||||||
|  |                 f.write("\n") | ||||||
|  | 
 | ||||||
|  |     def get_map_txt(self, image_id, image, class_names, map_out_path): | ||||||
|  |         f = open(os.path.join(map_out_path, "detection-results/" + image_id + ".txt"), "w", encoding='utf-8') | ||||||
|  |         image_shape = np.array(np.shape(image)[0:2]) | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。 | ||||||
|  |         #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         image = cvtColor(image) | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         #   给图像增加灰条,实现不失真的resize | ||||||
|  |         #   也可以直接resize进行识别 | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         #   添加上batch_size维度 | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) | ||||||
|  | 
 | ||||||
|  |         with torch.no_grad(): | ||||||
|  |             images = torch.from_numpy(image_data) | ||||||
|  |             if self.cuda: | ||||||
|  |                 images = images.cuda() | ||||||
|  |             # ---------------------------------------------------------# | ||||||
|  |             #   将图像输入网络当中进行预测! | ||||||
|  |             # ---------------------------------------------------------# | ||||||
|  |             outputs = self.net(images) | ||||||
|  |             outputs = self.bbox_util.decode_box(outputs) | ||||||
|  |             # ---------------------------------------------------------# | ||||||
|  |             #   将预测框进行堆叠,然后进行非极大抑制 | ||||||
|  |             # ---------------------------------------------------------# | ||||||
|  |             results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape, | ||||||
|  |                                                          image_shape, self.letterbox_image, conf_thres=self.confidence, | ||||||
|  |                                                          nms_thres=self.nms_iou) | ||||||
|  | 
 | ||||||
|  |             if results[0] is None: | ||||||
|  |                 return | ||||||
|  | 
 | ||||||
|  |             top_label = np.array(results[0][:, 6], dtype='int32') | ||||||
|  |             top_conf = results[0][:, 4] * results[0][:, 5] | ||||||
|  |             top_boxes = results[0][:, :4] | ||||||
|  | 
 | ||||||
|  |         top_100 = np.argsort(top_label)[::-1][:self.max_boxes] | ||||||
|  |         top_boxes = top_boxes[top_100] | ||||||
|  |         top_conf = top_conf[top_100] | ||||||
|  |         top_label = top_label[top_100] | ||||||
|  | 
 | ||||||
|  |         for i, c in list(enumerate(top_label)): | ||||||
|  |             predicted_class = self.class_names[int(c)] | ||||||
|  |             box = top_boxes[i] | ||||||
|  |             score = str(top_conf[i]) | ||||||
|  | 
 | ||||||
|  |             top, left, bottom, right = box | ||||||
|  |             if predicted_class not in class_names: | ||||||
|  |                 continue | ||||||
|  | 
 | ||||||
|  |             f.write("%s %s %s %s %s %s\n" % ( | ||||||
|  |                 predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)), str(int(bottom)))) | ||||||
|  | 
 | ||||||
|  |         f.close() | ||||||
|  |         return | ||||||
|  | 
 | ||||||
|  |     def on_epoch_end(self, epoch, model_eval): | ||||||
|  |         if epoch % self.period == 0 and self.eval_flag: | ||||||
|  |             self.net = model_eval | ||||||
|  |             if not os.path.exists(self.map_out_path): | ||||||
|  |                 os.makedirs(self.map_out_path) | ||||||
|  |             if not os.path.exists(os.path.join(self.map_out_path, "ground-truth")): | ||||||
|  |                 os.makedirs(os.path.join(self.map_out_path, "ground-truth")) | ||||||
|  |             if not os.path.exists(os.path.join(self.map_out_path, "detection-results")): | ||||||
|  |                 os.makedirs(os.path.join(self.map_out_path, "detection-results")) | ||||||
|  |             print("Get map.") | ||||||
|  |             for annotation_line in tqdm(self.val_lines): | ||||||
|  |                 line = annotation_line.split() | ||||||
|  |                 image_id = os.path.basename(line[0]).split('.')[0] | ||||||
|  |                 # ------------------------------# | ||||||
|  |                 #   读取图像并转换成RGB图像 | ||||||
|  |                 # ------------------------------# | ||||||
|  |                 image = Image.open(line[0]) | ||||||
|  |                 # ------------------------------# | ||||||
|  |                 #   获得预测框 | ||||||
|  |                 # ------------------------------# | ||||||
|  |                 gt_boxes = np.array([np.array(list(map(int, box.split(',')))) for box in line[1:]]) | ||||||
|  |                 # ------------------------------# | ||||||
|  |                 #   获得预测txt | ||||||
|  |                 # ------------------------------# | ||||||
|  |                 self.get_map_txt(image_id, image, self.class_names, self.map_out_path) | ||||||
|  | 
 | ||||||
|  |                 # ------------------------------# | ||||||
|  |                 #   获得真实框txt | ||||||
|  |                 # ------------------------------# | ||||||
|  |                 with open(os.path.join(self.map_out_path, "ground-truth/" + image_id + ".txt"), "w") as new_f: | ||||||
|  |                     for box in gt_boxes: | ||||||
|  |                         left, top, right, bottom, obj = box | ||||||
|  |                         obj_name = self.class_names[obj] | ||||||
|  |                         new_f.write("%s %s %s %s %s\n" % (obj_name, left, top, right, bottom)) | ||||||
|  | 
 | ||||||
|  |             print("Calculate Map.") | ||||||
|  |             try: | ||||||
|  |                 temp_map = get_coco_map(class_names=self.class_names, path=self.map_out_path)[1] | ||||||
|  |             except: | ||||||
|  |                 temp_map = get_map(self.MINOVERLAP, False, path=self.map_out_path) | ||||||
|  |             self.maps.append(temp_map) | ||||||
|  |             self.epoches.append(epoch) | ||||||
|  | 
 | ||||||
|  |             with open(os.path.join(self.log_dir, "epoch_map.txt"), 'a') as f: | ||||||
|  |                 f.write(str(temp_map)) | ||||||
|  |                 f.write("\n") | ||||||
|  | 
 | ||||||
|  |             plt.figure() | ||||||
|  |             plt.plot(self.epoches, self.maps, 'red', linewidth=2, label='train map') | ||||||
|  | 
 | ||||||
|  |             plt.grid(True) | ||||||
|  |             plt.xlabel('Epoch') | ||||||
|  |             plt.ylabel('Map %s' % str(self.MINOVERLAP)) | ||||||
|  |             plt.title('A Map Curve') | ||||||
|  |             plt.legend(loc="upper right") | ||||||
|  | 
 | ||||||
|  |             plt.savefig(os.path.join(self.log_dir, "epoch_map.png")) | ||||||
|  |             plt.cla() | ||||||
|  |             plt.close("all") | ||||||
|  | 
 | ||||||
|  |             print("Get map done.") | ||||||
|  |             shutil.rmtree(self.map_out_path) | ||||||
							
								
								
									
										170
									
								
								utils/dataloader.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										170
									
								
								utils/dataloader.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,170 @@ | |||||||
|  | import cv2 | ||||||
|  | import numpy as np | ||||||
|  | import torch | ||||||
|  | from PIL import Image | ||||||
|  | from torch.utils.data.dataset import Dataset | ||||||
|  | 
 | ||||||
|  | from utils.utils import cvtColor, preprocess_input | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class YoloDataset(Dataset): | ||||||
|  |     def __init__(self, annotation_lines, input_shape, num_classes, train): | ||||||
|  |         super(YoloDataset, self).__init__() | ||||||
|  |         self.annotation_lines = annotation_lines  # 记录训练集或测试集的文件的路径,这个是可以全部载入的 | ||||||
|  |         self.input_shape = input_shape  # 这里是  [416, 416] | ||||||
|  |         self.num_classes = num_classes  # 这里是20 | ||||||
|  |         self.length = len(self.annotation_lines)  # 数据的数量 | ||||||
|  |         self.train = train  # 是否是训练集的标记 | ||||||
|  | 
 | ||||||
|  |     def __len__(self): | ||||||
|  |         return self.length | ||||||
|  | 
 | ||||||
|  |     def __getitem__(self, index): | ||||||
|  |         index = index % self.length | ||||||
|  |         # ---------------------------------------------------# | ||||||
|  |         #   训练时进行数据的随机增强 | ||||||
|  |         #   验证时不进行数据的随机增强 | ||||||
|  |         # ---------------------------------------------------# | ||||||
|  |         image, box = self.get_random_data(self.annotation_lines[index], self.input_shape[0:2], | ||||||
|  |                                           random=self.train)  # 自定义的数据增强 | ||||||
|  |         image = np.transpose(preprocess_input(np.array(image, dtype=np.float32)), (2, 0, 1))  # 像素值归到0~1之间,然后变换坐标轴 | ||||||
|  |         box = np.array(box, dtype=np.float32)  # 转为numpy。np中常用的是创建新类型的array。 | ||||||
|  |         if len(box) != 0: | ||||||
|  |             box[:, [0, 2]] = box[:, [0, 2]] / self.input_shape[1]  # 把框的坐标归一化 | ||||||
|  |             box[:, [1, 3]] = box[:, [1, 3]] / self.input_shape[0] | ||||||
|  | 
 | ||||||
|  |             box[:, 2:4] = box[:, 2:4] - box[:, 0:2]  # box第0,1维记录中心点 box第2,3维记录宽高 | ||||||
|  |             box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2  # box第0,1维记录中心点 | ||||||
|  |         return image, box | ||||||
|  | 
 | ||||||
|  |     def rand(self, a=0, b=1): | ||||||
|  |         return np.random.rand() * (b - a) + a | ||||||
|  | 
 | ||||||
|  |     def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=0.7, val=0.4, random=True): | ||||||
|  |         line = annotation_line.split()  # 以空格、回车等分隔字符串 | ||||||
|  |         # ------------------------------# | ||||||
|  |         #   读取图像并转换成RGB图像 | ||||||
|  |         # ------------------------------# | ||||||
|  |         image = Image.open(line[0])  # line[0] 是图片的地址 | ||||||
|  |         image = cvtColor(image)  # 这里啥也没干 | ||||||
|  |         # ------------------------------# | ||||||
|  |         #   获得图像的高宽与目标高宽 | ||||||
|  |         # ------------------------------# | ||||||
|  |         iw, ih = image.size  # 获取图像的原始尺寸 | ||||||
|  |         h, w = input_shape | ||||||
|  |         # ------------------------------# | ||||||
|  |         #   获得预测框 | ||||||
|  |         # ------------------------------# | ||||||
|  |         box = np.array([np.array(list(map(int, box.split(',')))) for box in line[1:]])  # 从python二维矩阵转到 numpy二维矩阵 | ||||||
|  | 
 | ||||||
|  |         if not random:  # 没进入这里面 | ||||||
|  |             scale = min(w / iw, h / ih) | ||||||
|  |             nw = int(iw * scale) | ||||||
|  |             nh = int(ih * scale) | ||||||
|  |             dx = (w - nw) // 2 | ||||||
|  |             dy = (h - nh) // 2 | ||||||
|  | 
 | ||||||
|  |             # ---------------------------------# | ||||||
|  |             #   将图像多余的部分加上灰条 | ||||||
|  |             # ---------------------------------# | ||||||
|  |             image = image.resize((nw, nh), Image.BICUBIC) | ||||||
|  |             new_image = Image.new('RGB', (w, h), (128, 128, 128)) | ||||||
|  |             new_image.paste(image, (dx, dy)) | ||||||
|  |             image_data = np.array(new_image, np.float32) | ||||||
|  | 
 | ||||||
|  |             # ---------------------------------# | ||||||
|  |             #   对真实框进行调整 | ||||||
|  |             # ---------------------------------# | ||||||
|  |             if len(box) > 0: | ||||||
|  |                 np.random.shuffle(box) | ||||||
|  |                 box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx | ||||||
|  |                 box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy | ||||||
|  |                 box[:, 0:2][box[:, 0:2] < 0] = 0 | ||||||
|  |                 box[:, 2][box[:, 2] > w] = w | ||||||
|  |                 box[:, 3][box[:, 3] > h] = h | ||||||
|  |                 box_w = box[:, 2] - box[:, 0] | ||||||
|  |                 box_h = box[:, 3] - box[:, 1] | ||||||
|  |                 box = box[np.logical_and(box_w > 1, box_h > 1)]  # discard invalid box | ||||||
|  | 
 | ||||||
|  |             return image_data, box | ||||||
|  | 
 | ||||||
|  |         # ------------------------------------------# | ||||||
|  |         #   对原始图像进行缩放并且进行长和宽的扭曲 | ||||||
|  |         # ------------------------------------------# | ||||||
|  |         new_ar = iw / ih * self.rand(1 - jitter, 1 + jitter) / self.rand(1 - jitter, 1 + jitter)  # (iw*随机) / (ih*随机) | ||||||
|  |         scale = self.rand(.25, 2)  # 随机一个缩放比例 | ||||||
|  |         if new_ar < 1:  # 原图高大 | ||||||
|  |             nh = int(scale * h)  # 新图先缩放高 | ||||||
|  |             nw = int(nh * new_ar) | ||||||
|  |         else:  # 原图宽大 | ||||||
|  |             nw = int(scale * w)  # 新的宽从  预期宽中 乘以随机的比例 | ||||||
|  |             nh = int(nw / new_ar)  # 新的宽、高比,也是 new_ar, 也就是也是宽大 | ||||||
|  |         image = image.resize((nw, nh), Image.BICUBIC) | ||||||
|  | 
 | ||||||
|  |         # ------------------------------------------# | ||||||
|  |         #   将图像多余的部分加上灰条 | ||||||
|  |         # ------------------------------------------# | ||||||
|  |         dx = int(self.rand(0, w - nw))  # 在(0, w - nw)找一个点作为新图的放置点 | ||||||
|  |         dy = int(self.rand(0, h - nh)) | ||||||
|  |         new_image = Image.new('RGB', (w, h), (128, 128, 128))   # 画一个 412, 412大小的灰图 | ||||||
|  |         new_image.paste(image, (dx, dy))  # 在这里看看两者的区别 | ||||||
|  |         image = new_image | ||||||
|  | 
 | ||||||
|  |         # ------------------------------------------# | ||||||
|  |         #   翻转图像 | ||||||
|  |         # ------------------------------------------# | ||||||
|  |         flip = self.rand() < .5 | ||||||
|  |         if flip: | ||||||
|  |             image = image.transpose(Image.FLIP_LEFT_RIGHT) | ||||||
|  | 
 | ||||||
|  |         image_data = np.array(image, np.uint8) | ||||||
|  |         # ---------------------------------# | ||||||
|  |         #   对图像进行色域变换 | ||||||
|  |         #   计算色域变换的参数 | ||||||
|  |         # ---------------------------------# | ||||||
|  |         r = np.random.uniform(-1, 1, 3) * [hue, sat, val] + 1 | ||||||
|  |         # ---------------------------------# | ||||||
|  |         #   将图像转到HSV上 | ||||||
|  |         # ---------------------------------# | ||||||
|  |         hue, sat, val = cv2.split(cv2.cvtColor(image_data, cv2.COLOR_RGB2HSV)) | ||||||
|  |         dtype = image_data.dtype | ||||||
|  |         # ---------------------------------# | ||||||
|  |         #   应用变换 | ||||||
|  |         # ---------------------------------# | ||||||
|  |         x = np.arange(0, 256, dtype=r.dtype) | ||||||
|  |         lut_hue = ((x * r[0]) % 180).astype(dtype) | ||||||
|  |         lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) | ||||||
|  |         lut_val = np.clip(x * r[2], 0, 255).astype(dtype) | ||||||
|  |         # LUT是look-up table查找表的意思,cv2.LUT(src, lut, dst=None)的作用是对输入的src执行查找表lut转换 | ||||||
|  |         image_data = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))) | ||||||
|  |         image_data = cv2.cvtColor(image_data, cv2.COLOR_HSV2RGB)   # image_data在这里还是unit8类型 | ||||||
|  | 
 | ||||||
|  |         # ---------------------------------# | ||||||
|  |         #   对真实框进行调整 | ||||||
|  |         # ---------------------------------# | ||||||
|  |         if len(box) > 0:  # 如果有box | ||||||
|  |             np.random.shuffle(box) | ||||||
|  |             box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx  # 所有行的第0列和2列,也就是 x 坐标, 除以iw找到占原图的比例,再乘以nw,是新图的比例,再加dx是新图中的偏移 | ||||||
|  |             box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy | ||||||
|  |             if flip: | ||||||
|  |                 box[:, [0, 2]] = w - box[:, [2, 0]]  # 如果有水平翻转,则x坐标变换为416-x,并且x0 和 x1的位置互换一下 | ||||||
|  |             box[:, 0:2][box[:, 0:2] < 0] = 0  # 对于左上角的点在图像外(小于0),则把对应的位置的坐标置为0  # 右下角的点不会小于0吗? | ||||||
|  |             box[:, 2][box[:, 2] > w] = w  # 对于右下角的横坐标点超出图的,则置为w   # 右下角不会超出图吗? | ||||||
|  |             box[:, 3][box[:, 3] > h] = h  # 对于右下角的纵坐标点超出图的,则置为h | ||||||
|  |             box_w = box[:, 2] - box[:, 0] | ||||||
|  |             box_h = box[:, 3] - box[:, 1] | ||||||
|  |             box = box[np.logical_and(box_w > 1, box_h > 1)]  # 多余的检查?如果宽、高大于至少1,则保留下来 | ||||||
|  | 
 | ||||||
|  |         return image_data, box  # box依然是左上角和右下角的形式 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # DataLoader中collate_fn使用 | ||||||
|  | def yolo_dataset_collate(batch): | ||||||
|  |     images = []  # 这是是一个batch大小的列表,每一项是  image_data, box。需要把image放一堆,box放一堆 | ||||||
|  |     bboxes = [] | ||||||
|  |     for img, box in batch: | ||||||
|  |         images.append(img)  # images在这里已经是0~1的float32类型了 | ||||||
|  |         bboxes.append(box) | ||||||
|  |     images = torch.from_numpy(np.array(images)).type(torch.FloatTensor)  # 转换为  batch_size, C, H, W  的数据 | ||||||
|  |     bboxes = [torch.from_numpy(ann).type(torch.FloatTensor) for ann in bboxes]  # 转换为一个列表,每个元素是一组二维Tensor | ||||||
|  |     return images, bboxes | ||||||
							
								
								
									
										79
									
								
								utils/utils.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										79
									
								
								utils/utils.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,79 @@ | |||||||
|  | import numpy as np | ||||||
|  | from PIL import Image | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # ---------------------------------------------------------# | ||||||
|  | #   将图像转换成RGB图像,防止灰度图在预测时报错。 | ||||||
|  | #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB | ||||||
|  | # ---------------------------------------------------------# | ||||||
|  | def cvtColor(image): | ||||||
|  |     if len(np.shape(image)) == 3 and np.shape(image)[2] == 3: | ||||||
|  |         return image | ||||||
|  |     else: | ||||||
|  |         image = image.convert('RGB') | ||||||
|  |         return image | ||||||
|  | 
 | ||||||
|  |     # ---------------------------------------------------# | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | #   对输入图像进行resize | ||||||
|  | # ---------------------------------------------------# | ||||||
|  | def resize_image(image, size, letterbox_image): | ||||||
|  |     iw, ih = image.size | ||||||
|  |     w, h = size | ||||||
|  |     if letterbox_image: | ||||||
|  |         scale = min(w / iw, h / ih) | ||||||
|  |         nw = int(iw * scale) | ||||||
|  |         nh = int(ih * scale) | ||||||
|  | 
 | ||||||
|  |         image = image.resize((nw, nh), Image.BICUBIC) | ||||||
|  |         new_image = Image.new('RGB', size, (128, 128, 128)) | ||||||
|  |         new_image.paste(image, ((w - nw) // 2, (h - nh) // 2)) | ||||||
|  |     else: | ||||||
|  |         new_image = image.resize((w, h), Image.BICUBIC)  # 这里直接用了缩放,而不是加灰条的形式 | ||||||
|  |     return new_image | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # ---------------------------------------------------# | ||||||
|  | #   获得类 | ||||||
|  | # ---------------------------------------------------# | ||||||
|  | def get_classes(classes_path): | ||||||
|  |     with open(classes_path, encoding='utf-8') as f: | ||||||
|  |         class_names = f.readlines() | ||||||
|  |     class_names = [c.strip() for c in class_names] | ||||||
|  |     return class_names, len(class_names) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # ---------------------------------------------------# | ||||||
|  | #   获得先验框 | ||||||
|  | # ---------------------------------------------------# | ||||||
|  | def get_anchors(anchors_path): | ||||||
|  |     '''loads the anchors from a file''' | ||||||
|  |     with open(anchors_path, encoding='utf-8') as f: | ||||||
|  |         anchors = f.readline() | ||||||
|  |     anchors = [float(x) for x in anchors.split(',')] | ||||||
|  |     anchors = np.array(anchors).reshape(-1, 2) | ||||||
|  |     return anchors, len(anchors) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # ---------------------------------------------------# | ||||||
|  | #   获得学习率 | ||||||
|  | # ---------------------------------------------------# | ||||||
|  | def get_lr(optimizer): | ||||||
|  |     for param_group in optimizer.param_groups: | ||||||
|  |         return param_group['lr'] | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def preprocess_input(image): | ||||||
|  |     image /= 255.0 | ||||||
|  |     return image | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def show_config(**kwargs): | ||||||
|  |     print('Configurations:') | ||||||
|  |     print('-' * 70) | ||||||
|  |     print('|%25s | %40s|' % ('keys', 'values')) | ||||||
|  |     print('-' * 70) | ||||||
|  |     for key, value in kwargs.items(): | ||||||
|  |         print('|%25s | %40s|' % (str(key), str(value))) | ||||||
|  |     print('-' * 70) | ||||||
							
								
								
									
										232
									
								
								utils/utils_bbox.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										232
									
								
								utils/utils_bbox.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,232 @@ | |||||||
|  | import torch | ||||||
|  | import torch.nn as nn | ||||||
|  | from torchvision.ops import nms | ||||||
|  | import numpy as np | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class DecodeBox(): | ||||||
|  |     def __init__(self, anchors, num_classes, input_shape, anchors_mask=[[6, 7, 8], [3, 4, 5], [0, 1, 2]]): | ||||||
|  |         super(DecodeBox, self).__init__() | ||||||
|  |         self.anchors = anchors | ||||||
|  |         self.num_classes = num_classes | ||||||
|  |         self.bbox_attrs = 5 + num_classes | ||||||
|  |         self.input_shape = input_shape | ||||||
|  |         # -----------------------------------------------------------# | ||||||
|  |         #   13x13的特征层对应的anchor是[116,90],[156,198],[373,326] | ||||||
|  |         #   26x26的特征层对应的anchor是[30,61],[62,45],[59,119] | ||||||
|  |         #   52x52的特征层对应的anchor是[10,13],[16,30],[33,23] | ||||||
|  |         # -----------------------------------------------------------# | ||||||
|  |         self.anchors_mask = anchors_mask | ||||||
|  | 
 | ||||||
|  |     def decode_box(self, inputs): | ||||||
|  |         outputs = [] | ||||||
|  |         for i, input in enumerate(inputs): | ||||||
|  |             # -----------------------------------------------# | ||||||
|  |             #   输入的input一共有三个,他们的shape分别是 | ||||||
|  |             #   batch_size, 255, 13, 13 | ||||||
|  |             #   batch_size, 255, 26, 26 | ||||||
|  |             #   batch_size, 255, 52, 52 | ||||||
|  |             # -----------------------------------------------# | ||||||
|  |             batch_size = input.size(0) | ||||||
|  |             input_height = input.size(2) | ||||||
|  |             input_width = input.size(3) | ||||||
|  | 
 | ||||||
|  |             # -----------------------------------------------# | ||||||
|  |             #   输入为416x416时 | ||||||
|  |             #   stride_h = stride_w = 32、16、8 | ||||||
|  |             # -----------------------------------------------# | ||||||
|  |             stride_h = self.input_shape[0] / input_height | ||||||
|  |             stride_w = self.input_shape[1] / input_width | ||||||
|  |             # -------------------------------------------------# | ||||||
|  |             #   此时获得的scaled_anchors大小是相对于特征层的 | ||||||
|  |             # -------------------------------------------------# | ||||||
|  |             scaled_anchors = [(anchor_width / stride_w, anchor_height / stride_h) for anchor_width, anchor_height in | ||||||
|  |                               self.anchors[self.anchors_mask[i]]] | ||||||
|  | 
 | ||||||
|  |             # -----------------------------------------------# | ||||||
|  |             #   输入的input一共有三个,他们的shape分别是 | ||||||
|  |             #   batch_size, 3, 13, 13, 85 | ||||||
|  |             #   batch_size, 3, 26, 26, 85 | ||||||
|  |             #   batch_size, 3, 52, 52, 85 | ||||||
|  |             # -----------------------------------------------# | ||||||
|  |             prediction = input.view(batch_size, len(self.anchors_mask[i]), | ||||||
|  |                                     self.bbox_attrs, input_height, input_width).permute(0, 1, 3, 4, 2).contiguous() | ||||||
|  |             #  调整为 1,3,13,13,25 的形状 | ||||||
|  |             # -----------------------------------------------# | ||||||
|  |             #   先验框的中心位置的调整参数 | ||||||
|  |             # -----------------------------------------------# | ||||||
|  |             x = torch.sigmoid(prediction[..., 0]) | ||||||
|  |             y = torch.sigmoid(prediction[..., 1]) | ||||||
|  |             # -----------------------------------------------# | ||||||
|  |             #   先验框的宽高调整参数 | ||||||
|  |             # -----------------------------------------------# | ||||||
|  |             w = prediction[..., 2] | ||||||
|  |             h = prediction[..., 3] | ||||||
|  |             # -----------------------------------------------# | ||||||
|  |             #   获得置信度,是否有物体 | ||||||
|  |             # -----------------------------------------------# | ||||||
|  |             conf = torch.sigmoid(prediction[..., 4]) | ||||||
|  |             # -----------------------------------------------# | ||||||
|  |             #   种类置信度 | ||||||
|  |             # -----------------------------------------------# | ||||||
|  |             pred_cls = torch.sigmoid(prediction[..., 5:]) | ||||||
|  | 
 | ||||||
|  |             FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor | ||||||
|  |             LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor | ||||||
|  | 
 | ||||||
|  |             # ----------------------------------------------------------# | ||||||
|  |             #   生成网格,先验框中心,网格左上角  | ||||||
|  |             #   batch_size,3,13,13 | ||||||
|  |             # ----------------------------------------------------------# | ||||||
|  |             grid_x = torch.linspace(0, input_width - 1, input_width).repeat(input_height, 1).repeat( | ||||||
|  |                 batch_size * len(self.anchors_mask[i]), 1, 1).view(x.shape).type(FloatTensor) | ||||||
|  |             grid_y = torch.linspace(0, input_height - 1, input_height).repeat(input_width, 1).t().repeat( | ||||||
|  |                 batch_size * len(self.anchors_mask[i]), 1, 1).view(y.shape).type(FloatTensor) | ||||||
|  | 
 | ||||||
|  |             # ----------------------------------------------------------# | ||||||
|  |             #   按照网格格式生成先验框的宽高 | ||||||
|  |             #   batch_size,3,13,13 | ||||||
|  |             # ----------------------------------------------------------# | ||||||
|  |             anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0])) | ||||||
|  |             anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1])) | ||||||
|  |             anchor_w = anchor_w.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(w.shape) | ||||||
|  |             anchor_h = anchor_h.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(h.shape) | ||||||
|  | 
 | ||||||
|  |             # ----------------------------------------------------------# | ||||||
|  |             #   利用预测结果对先验框进行调整 | ||||||
|  |             #   首先调整先验框的中心,从先验框中心向右下角偏移  # ?从先验框左上角向右下角偏移? | ||||||
|  |             #   再调整先验框的宽高。 | ||||||
|  |             # ----------------------------------------------------------# | ||||||
|  |             pred_boxes = FloatTensor(prediction[..., :4].shape) | ||||||
|  |             pred_boxes[..., 0] = x.data + grid_x | ||||||
|  |             pred_boxes[..., 1] = y.data + grid_y | ||||||
|  |             pred_boxes[..., 2] = torch.exp(w.data) * anchor_w | ||||||
|  |             pred_boxes[..., 3] = torch.exp(h.data) * anchor_h | ||||||
|  | 
 | ||||||
|  |             # ----------------------------------------------------------# | ||||||
|  |             #   将输出结果归一化成小数的形式 | ||||||
|  |             # ----------------------------------------------------------# | ||||||
|  |             _scale = torch.Tensor([input_width, input_height, input_width, input_height]).type(FloatTensor) | ||||||
|  |             output = torch.cat((pred_boxes.view(batch_size, -1, 4) / _scale, | ||||||
|  |                                 conf.view(batch_size, -1, 1), pred_cls.view(batch_size, -1, self.num_classes)), -1) | ||||||
|  |             # output的shape是  batch_size, -1, attr(25) | ||||||
|  |             outputs.append(output.data) | ||||||
|  |         return outputs | ||||||
|  | 
 | ||||||
|  |     def yolo_correct_boxes(self, box_xy, box_wh, input_shape, image_shape, letterbox_image): | ||||||
|  |         # -----------------------------------------------------------------# | ||||||
|  |         #   把y轴放前面是因为方便预测框和图像的宽高进行相乘 | ||||||
|  |         # -----------------------------------------------------------------# | ||||||
|  |         box_yx = box_xy[..., ::-1] | ||||||
|  |         box_hw = box_wh[..., ::-1] | ||||||
|  |         input_shape = np.array(input_shape) | ||||||
|  |         image_shape = np.array(image_shape) | ||||||
|  | 
 | ||||||
|  |         if letterbox_image: | ||||||
|  |             # -----------------------------------------------------------------# | ||||||
|  |             #   这里求出来的offset是图像有效区域相对于图像左上角的偏移情况 | ||||||
|  |             #   new_shape指的是宽高缩放情况 | ||||||
|  |             # -----------------------------------------------------------------# | ||||||
|  |             new_shape = np.round(image_shape * np.min(input_shape / image_shape)) | ||||||
|  |             offset = (input_shape - new_shape) / 2. / input_shape | ||||||
|  |             scale = input_shape / new_shape | ||||||
|  | 
 | ||||||
|  |             box_yx = (box_yx - offset) * scale | ||||||
|  |             box_hw *= scale | ||||||
|  | 
 | ||||||
|  |         box_mins = box_yx - (box_hw / 2.) | ||||||
|  |         box_maxes = box_yx + (box_hw / 2.) | ||||||
|  |         boxes = np.concatenate([box_mins[..., 0:1], box_mins[..., 1:2], box_maxes[..., 0:1], box_maxes[..., 1:2]], | ||||||
|  |                                axis=-1) | ||||||
|  |         boxes *= np.concatenate([image_shape, image_shape], axis=-1) | ||||||
|  |         return boxes | ||||||
|  | 
 | ||||||
|  |     def non_max_suppression(self, prediction, num_classes, input_shape, image_shape, letterbox_image, conf_thres=0.5, | ||||||
|  |                             nms_thres=0.4): | ||||||
|  |         # ----------------------------------------------------------# | ||||||
|  |         #   将预测结果的格式转换成左上角右下角的格式。 | ||||||
|  |         #   prediction  [batch_size, num_anchors, 85] | ||||||
|  |         # ----------------------------------------------------------# | ||||||
|  |         box_corner = prediction.new(prediction.shape) | ||||||
|  |         box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2 | ||||||
|  |         box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2 | ||||||
|  |         box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2 | ||||||
|  |         box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2 | ||||||
|  |         prediction[:, :, :4] = box_corner[:, :, :4] | ||||||
|  | 
 | ||||||
|  |         output = [None for _ in range(len(prediction))] | ||||||
|  |         for i, image_pred in enumerate(prediction): | ||||||
|  |             # ----------------------------------------------------------# | ||||||
|  |             #   对种类预测部分取max。     # image_pred 是在prediction中以0维度迭代 | ||||||
|  |             #   class_conf  [num_anchors, 1]    种类置信度 | ||||||
|  |             #   class_pred  [num_anchors, 1]    种类    image_pred[:, 5:5 + num_classes]   是取出类别 | ||||||
|  |             # ----------------------------------------------------------# | ||||||
|  |             class_conf, class_pred = torch.max(image_pred[:, 5:5 + num_classes], 1, keepdim=True) | ||||||
|  | 
 | ||||||
|  |             # ----------------------------------------------------------# | ||||||
|  |             #   利用置信度进行第一轮筛选 | ||||||
|  |             # ----------------------------------------------------------# | ||||||
|  |             conf_mask = (image_pred[:, 4] * class_conf[:, 0] >= conf_thres).squeeze() | ||||||
|  | 
 | ||||||
|  |             # ----------------------------------------------------------# | ||||||
|  |             #   根据置信度进行预测结果的筛选 | ||||||
|  |             # ----------------------------------------------------------# | ||||||
|  |             image_pred = image_pred[conf_mask] | ||||||
|  |             class_conf = class_conf[conf_mask] | ||||||
|  |             class_pred = class_pred[conf_mask] | ||||||
|  |             if not image_pred.size(0): | ||||||
|  |                 continue  # 如果没有剩下类别,就判断下一张图片 | ||||||
|  |             # -------------------------------------------------------------------------# | ||||||
|  |             #   detections  [num_anchors, 7] | ||||||
|  |             #   7的内容为:x1, y1, x2, y2, obj_conf, class_conf, class_pred | ||||||
|  |             # -------------------------------------------------------------------------# | ||||||
|  |             detections = torch.cat((image_pred[:, :5], class_conf.float(), class_pred.float()), 1) | ||||||
|  | 
 | ||||||
|  |             # ------------------------------------------# | ||||||
|  |             #   获得预测结果中包含的所有种类 | ||||||
|  |             # ------------------------------------------# | ||||||
|  |             unique_labels = detections[:, -1].cpu().unique() | ||||||
|  | 
 | ||||||
|  |             if prediction.is_cuda: | ||||||
|  |                 unique_labels = unique_labels.cuda() | ||||||
|  |                 detections = detections.cuda() | ||||||
|  | 
 | ||||||
|  |             for c in unique_labels: | ||||||
|  |                 # ------------------------------------------# | ||||||
|  |                 #   获得某一类得分筛选后全部的预测结果 | ||||||
|  |                 # ------------------------------------------# | ||||||
|  |                 detections_class = detections[detections[:, -1] == c] | ||||||
|  | 
 | ||||||
|  |                 # ------------------------------------------# | ||||||
|  |                 #   使用官方自带的非极大抑制会速度更快一些! | ||||||
|  |                 # ------------------------------------------# | ||||||
|  |                 keep = nms( | ||||||
|  |                     detections_class[:, :4], | ||||||
|  |                     detections_class[:, 4] * detections_class[:, 5], | ||||||
|  |                     nms_thres | ||||||
|  |                 ) | ||||||
|  |                 max_detections = detections_class[keep] | ||||||
|  | 
 | ||||||
|  |                 # # 按照存在物体的置信度排序 | ||||||
|  |                 # _, conf_sort_index = torch.sort(detections_class[:, 4]*detections_class[:, 5], descending=True) | ||||||
|  |                 # detections_class = detections_class[conf_sort_index] | ||||||
|  |                 # # 进行非极大抑制 | ||||||
|  |                 # max_detections = [] | ||||||
|  |                 # while detections_class.size(0): | ||||||
|  |                 #     # 取出这一类置信度最高的,一步一步往下判断,判断重合程度是否大于nms_thres,如果是则去除掉 | ||||||
|  |                 #     max_detections.append(detections_class[0].unsqueeze(0)) | ||||||
|  |                 #     if len(detections_class) == 1: | ||||||
|  |                 #         break | ||||||
|  |                 #     ious = bbox_iou(max_detections[-1], detections_class[1:]) | ||||||
|  |                 #     detections_class = detections_class[1:][ious < nms_thres] | ||||||
|  |                 # # 堆叠 | ||||||
|  |                 # max_detections = torch.cat(max_detections).data | ||||||
|  | 
 | ||||||
|  |                 # Add max detections to outputs | ||||||
|  |                 output[i] = max_detections if output[i] is None else torch.cat((output[i], max_detections)) | ||||||
|  | 
 | ||||||
|  |             if output[i] is not None: | ||||||
|  |                 output[i] = output[i].cpu().numpy() | ||||||
|  |                 box_xy, box_wh = (output[i][:, 0:2] + output[i][:, 2:4]) / 2, output[i][:, 2:4] - output[i][:, 0:2] | ||||||
|  |                 output[i][:, :4] = self.yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape, letterbox_image) | ||||||
|  |         return output | ||||||
							
								
								
									
										151
									
								
								utils/utils_fit.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										151
									
								
								utils/utils_fit.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,151 @@ | |||||||
|  | import os | ||||||
|  | 
 | ||||||
|  | import torch | ||||||
|  | from tqdm import tqdm | ||||||
|  | 
 | ||||||
|  | from utils.utils import get_lr | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def fit_one_epoch(model_train, model, yolo_loss, loss_history, eval_callback, optimizer, epoch, epoch_step, | ||||||
|  |                   epoch_step_val, gen, gen_val, Epoch, cuda, fp16, scaler, save_period, save_dir, local_rank=0): | ||||||
|  |     loss = 0 | ||||||
|  |     val_loss = 0 | ||||||
|  | 
 | ||||||
|  |     if local_rank == 0: | ||||||
|  |         print('Start Train') | ||||||
|  |         pbar = tqdm(total=epoch_step, desc=f'Epoch {epoch + 1}/{Epoch}', postfix=dict, mininterval=0.3) | ||||||
|  |     model_train.train()  # 调整所有的模块为train模式 | ||||||
|  |     for iteration, batch in enumerate(gen): | ||||||
|  |         if iteration >= epoch_step:  # 有什么意义? | ||||||
|  |             break | ||||||
|  | 
 | ||||||
|  |         images, targets = batch[0], batch[1]   # targets也是归一化了的 | ||||||
|  |         with torch.no_grad(): | ||||||
|  |             if cuda: | ||||||
|  |                 images = images.cuda(local_rank) | ||||||
|  |                 targets = [ann.cuda(local_rank) for ann in | ||||||
|  |                            targets]  # targets是一个python的list,里面是tensor,把tensor逐个转到cuda上,然后targets还是python的列表 | ||||||
|  |         # ----------------------# | ||||||
|  |         #   清零梯度 | ||||||
|  |         # ----------------------# | ||||||
|  |         optimizer.zero_grad() | ||||||
|  |         if not fp16: | ||||||
|  |             # ----------------------# | ||||||
|  |             #   前向传播 | ||||||
|  |             # ----------------------# | ||||||
|  |             outputs = model_train(images) | ||||||
|  | 
 | ||||||
|  |             loss_value_all = 0 | ||||||
|  |             # ----------------------# | ||||||
|  |             #   计算损失 | ||||||
|  |             # ----------------------# | ||||||
|  |             for l in range(len(outputs)):  # 三组不同分辨率大小的输出特征分别计算 | ||||||
|  |                 loss_item = yolo_loss(l, outputs[l], targets) | ||||||
|  |                 loss_value_all += loss_item | ||||||
|  |             loss_value = loss_value_all | ||||||
|  | 
 | ||||||
|  |             # ----------------------# | ||||||
|  |             #   反向传播 | ||||||
|  |             # ----------------------# | ||||||
|  |             loss_value.backward() | ||||||
|  |             optimizer.step() | ||||||
|  |         else:  # 不进入这条分支 | ||||||
|  |             from torch.cuda.amp import autocast | ||||||
|  |             with autocast(): | ||||||
|  |                 # ----------------------# | ||||||
|  |                 #   前向传播 | ||||||
|  |                 # ----------------------# | ||||||
|  |                 outputs = model_train(images) | ||||||
|  | 
 | ||||||
|  |                 loss_value_all = 0 | ||||||
|  |                 # ----------------------# | ||||||
|  |                 #   计算损失 | ||||||
|  |                 # ----------------------# | ||||||
|  |                 for l in range(len(outputs)): | ||||||
|  |                     loss_item = yolo_loss(l, outputs[l], targets) | ||||||
|  |                     loss_value_all += loss_item | ||||||
|  |                 loss_value = loss_value_all | ||||||
|  | 
 | ||||||
|  |             # ----------------------# | ||||||
|  |             #   反向传播 | ||||||
|  |             # ----------------------# | ||||||
|  |             scaler.scale(loss_value).backward() | ||||||
|  |             scaler.step(optimizer) | ||||||
|  |             scaler.update() | ||||||
|  | 
 | ||||||
|  |         loss += loss_value.item() | ||||||
|  | 
 | ||||||
|  |         # # 调试用 begin | ||||||
|  |         # if iteration > 2: | ||||||
|  |         #     break | ||||||
|  |         # # 调试用 end | ||||||
|  | 
 | ||||||
|  |         if local_rank == 0: | ||||||
|  |             pbar.set_postfix(**{'loss': loss / (iteration + 1), | ||||||
|  |                                 'lr': get_lr(optimizer)}) | ||||||
|  |             pbar.update(1) | ||||||
|  | 
 | ||||||
|  |     if local_rank == 0: | ||||||
|  |         pbar.close() | ||||||
|  |         print('Finish Train') | ||||||
|  |         print('Start Validation') | ||||||
|  |         pbar = tqdm(total=epoch_step_val, desc=f'Epoch {epoch + 1}/{Epoch}', postfix=dict, mininterval=0.3) | ||||||
|  | 
 | ||||||
|  |     model_train.eval() | ||||||
|  |     for iteration, batch in enumerate(gen_val): | ||||||
|  |         if iteration >= epoch_step_val: | ||||||
|  |             break | ||||||
|  |         images, targets = batch[0], batch[1] | ||||||
|  |         with torch.no_grad(): | ||||||
|  |             if cuda: | ||||||
|  |                 images = images.cuda(local_rank) | ||||||
|  |                 targets = [ann.cuda(local_rank) for ann in targets] | ||||||
|  |             # ----------------------# | ||||||
|  |             #   清零梯度 | ||||||
|  |             # ----------------------# | ||||||
|  |             optimizer.zero_grad() | ||||||
|  |             # ----------------------# | ||||||
|  |             #   前向传播 | ||||||
|  |             # ----------------------# | ||||||
|  |             outputs = model_train(images) | ||||||
|  | 
 | ||||||
|  |             loss_value_all = 0 | ||||||
|  |             # ----------------------# | ||||||
|  |             #   计算损失 | ||||||
|  |             # ----------------------# | ||||||
|  |             for l in range(len(outputs)): | ||||||
|  |                 loss_item = yolo_loss(l, outputs[l], targets) | ||||||
|  |                 loss_value_all += loss_item | ||||||
|  |             loss_value = loss_value_all | ||||||
|  | 
 | ||||||
|  |         val_loss += loss_value.item() | ||||||
|  | 
 | ||||||
|  |         # # 调试用 begin | ||||||
|  |         # if iteration > 2: | ||||||
|  |         #     break | ||||||
|  |         # # 调试用 end | ||||||
|  | 
 | ||||||
|  |         if local_rank == 0: | ||||||
|  |             pbar.set_postfix(**{'val_loss': val_loss / (iteration + 1)}) | ||||||
|  |             pbar.update(1) | ||||||
|  | 
 | ||||||
|  |     if local_rank == 0: | ||||||
|  |         pbar.close() | ||||||
|  |         print('Finish Validation') | ||||||
|  |         loss_history.append_loss(epoch + 1, loss / epoch_step, val_loss / epoch_step_val) | ||||||
|  |         eval_callback.on_epoch_end(epoch + 1, model_train) | ||||||
|  |         print('Epoch:' + str(epoch + 1) + '/' + str(Epoch)) | ||||||
|  |         print('Total Loss: %.3f || Val Loss: %.3f ' % (loss / epoch_step, val_loss / epoch_step_val)) | ||||||
|  | 
 | ||||||
|  |         # -----------------------------------------------# | ||||||
|  |         #   保存权值 | ||||||
|  |         # -----------------------------------------------# | ||||||
|  |         if (epoch + 1) % save_period == 0 or epoch + 1 == Epoch: | ||||||
|  |             torch.save(model.state_dict(), os.path.join(save_dir, "ep%03d-loss%.3f-val_loss%.3f.pth" % ( | ||||||
|  |                 epoch + 1, loss / epoch_step, val_loss / epoch_step_val))) | ||||||
|  | 
 | ||||||
|  |         if len(loss_history.val_loss) <= 1 or (val_loss / epoch_step_val) <= min(loss_history.val_loss): | ||||||
|  |             print('Save best model to best_epoch_weights.pth') | ||||||
|  |             torch.save(model.state_dict(), os.path.join(save_dir, "best_epoch_weights.pth")) | ||||||
|  | 
 | ||||||
|  |         torch.save(model.state_dict(), os.path.join(save_dir, "last_epoch_weights.pth")) | ||||||
							
								
								
									
										963
									
								
								utils/utils_map.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										963
									
								
								utils/utils_map.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,963 @@ | |||||||
|  | import glob | ||||||
|  | import json | ||||||
|  | import math | ||||||
|  | import operator | ||||||
|  | import os | ||||||
|  | import shutil | ||||||
|  | import sys | ||||||
|  | 
 | ||||||
|  | try: | ||||||
|  |     from pycocotools.coco import COCO | ||||||
|  |     from pycocotools.cocoeval import COCOeval | ||||||
|  | except: | ||||||
|  |     pass | ||||||
|  | import cv2 | ||||||
|  | import matplotlib | ||||||
|  | 
 | ||||||
|  | matplotlib.use('Agg') | ||||||
|  | from matplotlib import pyplot as plt | ||||||
|  | import numpy as np | ||||||
|  | 
 | ||||||
|  | ''' | ||||||
|  |     0,0 ------> x (width) | ||||||
|  |      | | ||||||
|  |      |  (Left,Top) | ||||||
|  |      |      *_________ | ||||||
|  |      |      |         | | ||||||
|  |             |         | | ||||||
|  |      y      |_________| | ||||||
|  |   (height)            * | ||||||
|  |                 (Right,Bottom) | ||||||
|  | ''' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def log_average_miss_rate(precision, fp_cumsum, num_images): | ||||||
|  |     """ | ||||||
|  |         log-average miss rate: | ||||||
|  |             Calculated by averaging miss rates at 9 evenly spaced FPPI points | ||||||
|  |             between 10e-2 and 10e0, in log-space. | ||||||
|  | 
 | ||||||
|  |         output: | ||||||
|  |                 lamr | log-average miss rate | ||||||
|  |                 mr | miss rate | ||||||
|  |                 fppi | false positives per image | ||||||
|  | 
 | ||||||
|  |         references: | ||||||
|  |             [1] Dollar, Piotr, et al. "Pedestrian Detection: An Evaluation of the | ||||||
|  |                State of the Art." Pattern Analysis and Machine Intelligence, IEEE | ||||||
|  |                Transactions on 34.4 (2012): 743 - 761. | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     if precision.size == 0: | ||||||
|  |         lamr = 0 | ||||||
|  |         mr = 1 | ||||||
|  |         fppi = 0 | ||||||
|  |         return lamr, mr, fppi | ||||||
|  | 
 | ||||||
|  |     fppi = fp_cumsum / float(num_images) | ||||||
|  |     mr = (1 - precision) | ||||||
|  | 
 | ||||||
|  |     fppi_tmp = np.insert(fppi, 0, -1.0) | ||||||
|  |     mr_tmp = np.insert(mr, 0, 1.0) | ||||||
|  | 
 | ||||||
|  |     ref = np.logspace(-2.0, 0.0, num=9) | ||||||
|  |     for i, ref_i in enumerate(ref): | ||||||
|  |         j = np.where(fppi_tmp <= ref_i)[-1][-1] | ||||||
|  |         ref[i] = mr_tmp[j] | ||||||
|  | 
 | ||||||
|  |     lamr = math.exp(np.mean(np.log(np.maximum(1e-10, ref)))) | ||||||
|  | 
 | ||||||
|  |     return lamr, mr, fppi | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | """ | ||||||
|  |  throw error and exit | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def error(msg): | ||||||
|  |     print(msg) | ||||||
|  |     sys.exit(0) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | """ | ||||||
|  |  check if the number is a float between 0.0 and 1.0 | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def is_float_between_0_and_1(value): | ||||||
|  |     try: | ||||||
|  |         val = float(value) | ||||||
|  |         if val > 0.0 and val < 1.0: | ||||||
|  |             return True | ||||||
|  |         else: | ||||||
|  |             return False | ||||||
|  |     except ValueError: | ||||||
|  |         return False | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | """ | ||||||
|  |  Calculate the AP given the recall and precision array | ||||||
|  |     1st) We compute a version of the measured precision/recall curve with | ||||||
|  |          precision monotonically decreasing | ||||||
|  |     2nd) We compute the AP as the area under this curve by numerical integration. | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def voc_ap(rec, prec): | ||||||
|  |     """ | ||||||
|  |     --- Official matlab code VOC2012--- | ||||||
|  |     mrec=[0 ; rec ; 1]; | ||||||
|  |     mpre=[0 ; prec ; 0]; | ||||||
|  |     for i=numel(mpre)-1:-1:1 | ||||||
|  |             mpre(i)=max(mpre(i),mpre(i+1)); | ||||||
|  |     end | ||||||
|  |     i=find(mrec(2:end)~=mrec(1:end-1))+1; | ||||||
|  |     ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); | ||||||
|  |     """ | ||||||
|  |     rec.insert(0, 0.0)  # insert 0.0 at begining of list | ||||||
|  |     rec.append(1.0)  # insert 1.0 at end of list | ||||||
|  |     mrec = rec[:] | ||||||
|  |     prec.insert(0, 0.0)  # insert 0.0 at begining of list | ||||||
|  |     prec.append(0.0)  # insert 0.0 at end of list | ||||||
|  |     mpre = prec[:] | ||||||
|  |     """ | ||||||
|  |      This part makes the precision monotonically decreasing | ||||||
|  |         (goes from the end to the beginning) | ||||||
|  |         matlab: for i=numel(mpre)-1:-1:1 | ||||||
|  |                     mpre(i)=max(mpre(i),mpre(i+1)); | ||||||
|  |     """ | ||||||
|  |     for i in range(len(mpre) - 2, -1, -1): | ||||||
|  |         mpre[i] = max(mpre[i], mpre[i + 1]) | ||||||
|  |     """ | ||||||
|  |      This part creates a list of indexes where the recall changes | ||||||
|  |         matlab: i=find(mrec(2:end)~=mrec(1:end-1))+1; | ||||||
|  |     """ | ||||||
|  |     i_list = [] | ||||||
|  |     for i in range(1, len(mrec)): | ||||||
|  |         if mrec[i] != mrec[i - 1]: | ||||||
|  |             i_list.append(i)  # if it was matlab would be i + 1 | ||||||
|  |     """ | ||||||
|  |      The Average Precision (AP) is the area under the curve | ||||||
|  |         (numerical integration) | ||||||
|  |         matlab: ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); | ||||||
|  |     """ | ||||||
|  |     ap = 0.0 | ||||||
|  |     for i in i_list: | ||||||
|  |         ap += ((mrec[i] - mrec[i - 1]) * mpre[i]) | ||||||
|  |     return ap, mrec, mpre | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | """ | ||||||
|  |  Convert the lines of a file to a list | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def file_lines_to_list(path): | ||||||
|  |     # open txt file lines to a list | ||||||
|  |     with open(path) as f: | ||||||
|  |         content = f.readlines() | ||||||
|  |     # remove whitespace characters like `\n` at the end of each line | ||||||
|  |     content = [x.strip() for x in content] | ||||||
|  |     return content | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | """ | ||||||
|  |  Draws text in image | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def draw_text_in_image(img, text, pos, color, line_width): | ||||||
|  |     font = cv2.FONT_HERSHEY_PLAIN | ||||||
|  |     fontScale = 1 | ||||||
|  |     lineType = 1 | ||||||
|  |     bottomLeftCornerOfText = pos | ||||||
|  |     cv2.putText(img, text, | ||||||
|  |                 bottomLeftCornerOfText, | ||||||
|  |                 font, | ||||||
|  |                 fontScale, | ||||||
|  |                 color, | ||||||
|  |                 lineType) | ||||||
|  |     text_width, _ = cv2.getTextSize(text, font, fontScale, lineType)[0] | ||||||
|  |     return img, (line_width + text_width) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | """ | ||||||
|  |  Plot - adjust axes | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def adjust_axes(r, t, fig, axes): | ||||||
|  |     # get text width for re-scaling | ||||||
|  |     bb = t.get_window_extent(renderer=r) | ||||||
|  |     text_width_inches = bb.width / fig.dpi | ||||||
|  |     # get axis width in inches | ||||||
|  |     current_fig_width = fig.get_figwidth() | ||||||
|  |     new_fig_width = current_fig_width + text_width_inches | ||||||
|  |     propotion = new_fig_width / current_fig_width | ||||||
|  |     # get axis limit | ||||||
|  |     x_lim = axes.get_xlim() | ||||||
|  |     axes.set_xlim([x_lim[0], x_lim[1] * propotion]) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | """ | ||||||
|  |  Draw plot using Matplotlib | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def draw_plot_func(dictionary, n_classes, window_title, plot_title, x_label, output_path, to_show, plot_color, | ||||||
|  |                    true_p_bar): | ||||||
|  |     # sort the dictionary by decreasing value, into a list of tuples | ||||||
|  |     sorted_dic_by_value = sorted(dictionary.items(), key=operator.itemgetter(1)) | ||||||
|  |     # unpacking the list of tuples into two lists | ||||||
|  |     sorted_keys, sorted_values = zip(*sorted_dic_by_value) | ||||||
|  |     #  | ||||||
|  |     if true_p_bar != "": | ||||||
|  |         """ | ||||||
|  |          Special case to draw in: | ||||||
|  |             - green -> TP: True Positives (object detected and matches ground-truth) | ||||||
|  |             - red -> FP: False Positives (object detected but does not match ground-truth) | ||||||
|  |             - orange -> FN: False Negatives (object not detected but present in the ground-truth) | ||||||
|  |         """ | ||||||
|  |         fp_sorted = [] | ||||||
|  |         tp_sorted = [] | ||||||
|  |         for key in sorted_keys: | ||||||
|  |             fp_sorted.append(dictionary[key] - true_p_bar[key]) | ||||||
|  |             tp_sorted.append(true_p_bar[key]) | ||||||
|  |         plt.barh(range(n_classes), fp_sorted, align='center', color='crimson', label='False Positive') | ||||||
|  |         plt.barh(range(n_classes), tp_sorted, align='center', color='forestgreen', label='True Positive', | ||||||
|  |                  left=fp_sorted) | ||||||
|  |         # add legend | ||||||
|  |         plt.legend(loc='lower right') | ||||||
|  |         """ | ||||||
|  |          Write number on side of bar | ||||||
|  |         """ | ||||||
|  |         fig = plt.gcf()  # gcf - get current figure | ||||||
|  |         axes = plt.gca() | ||||||
|  |         r = fig.canvas.get_renderer() | ||||||
|  |         for i, val in enumerate(sorted_values): | ||||||
|  |             fp_val = fp_sorted[i] | ||||||
|  |             tp_val = tp_sorted[i] | ||||||
|  |             fp_str_val = " " + str(fp_val) | ||||||
|  |             tp_str_val = fp_str_val + " " + str(tp_val) | ||||||
|  |             # trick to paint multicolor with offset: | ||||||
|  |             # first paint everything and then repaint the first number | ||||||
|  |             t = plt.text(val, i, tp_str_val, color='forestgreen', va='center', fontweight='bold') | ||||||
|  |             plt.text(val, i, fp_str_val, color='crimson', va='center', fontweight='bold') | ||||||
|  |             if i == (len(sorted_values) - 1):  # largest bar | ||||||
|  |                 adjust_axes(r, t, fig, axes) | ||||||
|  |     else: | ||||||
|  |         plt.barh(range(n_classes), sorted_values, color=plot_color) | ||||||
|  |         """ | ||||||
|  |          Write number on side of bar | ||||||
|  |         """ | ||||||
|  |         fig = plt.gcf()  # gcf - get current figure | ||||||
|  |         axes = plt.gca() | ||||||
|  |         r = fig.canvas.get_renderer() | ||||||
|  |         for i, val in enumerate(sorted_values): | ||||||
|  |             str_val = " " + str(val)  # add a space before | ||||||
|  |             if val < 1.0: | ||||||
|  |                 str_val = " {0:.2f}".format(val) | ||||||
|  |             t = plt.text(val, i, str_val, color=plot_color, va='center', fontweight='bold') | ||||||
|  |             # re-set axes to show number inside the figure | ||||||
|  |             if i == (len(sorted_values) - 1):  # largest bar | ||||||
|  |                 adjust_axes(r, t, fig, axes) | ||||||
|  |     # set window title | ||||||
|  |     fig.canvas.set_window_title(window_title) | ||||||
|  |     # write classes in y axis | ||||||
|  |     tick_font_size = 12 | ||||||
|  |     plt.yticks(range(n_classes), sorted_keys, fontsize=tick_font_size) | ||||||
|  |     """ | ||||||
|  |      Re-scale height accordingly | ||||||
|  |     """ | ||||||
|  |     init_height = fig.get_figheight() | ||||||
|  |     # comput the matrix height in points and inches | ||||||
|  |     dpi = fig.dpi | ||||||
|  |     height_pt = n_classes * (tick_font_size * 1.4)  # 1.4 (some spacing) | ||||||
|  |     height_in = height_pt / dpi | ||||||
|  |     # compute the required figure height  | ||||||
|  |     top_margin = 0.15  # in percentage of the figure height | ||||||
|  |     bottom_margin = 0.05  # in percentage of the figure height | ||||||
|  |     figure_height = height_in / (1 - top_margin - bottom_margin) | ||||||
|  |     # set new height | ||||||
|  |     if figure_height > init_height: | ||||||
|  |         fig.set_figheight(figure_height) | ||||||
|  | 
 | ||||||
|  |     # set plot title | ||||||
|  |     plt.title(plot_title, fontsize=14) | ||||||
|  |     # set axis titles | ||||||
|  |     # plt.xlabel('classes') | ||||||
|  |     plt.xlabel(x_label, fontsize='large') | ||||||
|  |     # adjust size of window | ||||||
|  |     fig.tight_layout() | ||||||
|  |     # save the plot | ||||||
|  |     fig.savefig(output_path) | ||||||
|  |     # show image | ||||||
|  |     if to_show: | ||||||
|  |         plt.show() | ||||||
|  |     # close the plot | ||||||
|  |     plt.close() | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def get_map(MINOVERLAP, draw_plot, score_threhold=0.5, path='./map_out'): | ||||||
|  |     GT_PATH = os.path.join(path, 'ground-truth') | ||||||
|  |     DR_PATH = os.path.join(path, 'detection-results') | ||||||
|  |     IMG_PATH = os.path.join(path, 'images-optional') | ||||||
|  |     TEMP_FILES_PATH = os.path.join(path, '.temp_files') | ||||||
|  |     RESULTS_FILES_PATH = os.path.join(path, 'results') | ||||||
|  | 
 | ||||||
|  |     show_animation = True | ||||||
|  |     if os.path.exists(IMG_PATH): | ||||||
|  |         for dirpath, dirnames, files in os.walk(IMG_PATH): | ||||||
|  |             if not files: | ||||||
|  |                 show_animation = False | ||||||
|  |     else: | ||||||
|  |         show_animation = False | ||||||
|  | 
 | ||||||
|  |     if not os.path.exists(TEMP_FILES_PATH): | ||||||
|  |         os.makedirs(TEMP_FILES_PATH) | ||||||
|  | 
 | ||||||
|  |     if os.path.exists(RESULTS_FILES_PATH): | ||||||
|  |         shutil.rmtree(RESULTS_FILES_PATH) | ||||||
|  |     else: | ||||||
|  |         os.makedirs(RESULTS_FILES_PATH) | ||||||
|  |     if draw_plot: | ||||||
|  |         try: | ||||||
|  |             matplotlib.use('TkAgg') | ||||||
|  |         except: | ||||||
|  |             pass | ||||||
|  |         os.makedirs(os.path.join(RESULTS_FILES_PATH, "AP")) | ||||||
|  |         os.makedirs(os.path.join(RESULTS_FILES_PATH, "F1")) | ||||||
|  |         os.makedirs(os.path.join(RESULTS_FILES_PATH, "Recall")) | ||||||
|  |         os.makedirs(os.path.join(RESULTS_FILES_PATH, "Precision")) | ||||||
|  |     if show_animation: | ||||||
|  |         os.makedirs(os.path.join(RESULTS_FILES_PATH, "images", "detections_one_by_one")) | ||||||
|  | 
 | ||||||
|  |     ground_truth_files_list = glob.glob(GT_PATH + '/*.txt') | ||||||
|  |     if len(ground_truth_files_list) == 0: | ||||||
|  |         error("Error: No ground-truth files found!") | ||||||
|  |     ground_truth_files_list.sort() | ||||||
|  |     gt_counter_per_class = {} | ||||||
|  |     counter_images_per_class = {} | ||||||
|  | 
 | ||||||
|  |     for txt_file in ground_truth_files_list: | ||||||
|  |         file_id = txt_file.split(".txt", 1)[0] | ||||||
|  |         file_id = os.path.basename(os.path.normpath(file_id)) | ||||||
|  |         temp_path = os.path.join(DR_PATH, (file_id + ".txt")) | ||||||
|  |         if not os.path.exists(temp_path): | ||||||
|  |             error_msg = "Error. File not found: {}\n".format(temp_path) | ||||||
|  |             error(error_msg) | ||||||
|  |         lines_list = file_lines_to_list(txt_file) | ||||||
|  |         bounding_boxes = [] | ||||||
|  |         is_difficult = False | ||||||
|  |         already_seen_classes = [] | ||||||
|  |         for line in lines_list: | ||||||
|  |             try: | ||||||
|  |                 if "difficult" in line: | ||||||
|  |                     class_name, left, top, right, bottom, _difficult = line.split() | ||||||
|  |                     is_difficult = True | ||||||
|  |                 else: | ||||||
|  |                     class_name, left, top, right, bottom = line.split() | ||||||
|  |             except: | ||||||
|  |                 if "difficult" in line: | ||||||
|  |                     line_split = line.split() | ||||||
|  |                     _difficult = line_split[-1] | ||||||
|  |                     bottom = line_split[-2] | ||||||
|  |                     right = line_split[-3] | ||||||
|  |                     top = line_split[-4] | ||||||
|  |                     left = line_split[-5] | ||||||
|  |                     class_name = "" | ||||||
|  |                     for name in line_split[:-5]: | ||||||
|  |                         class_name += name + " " | ||||||
|  |                     class_name = class_name[:-1] | ||||||
|  |                     is_difficult = True | ||||||
|  |                 else: | ||||||
|  |                     line_split = line.split() | ||||||
|  |                     bottom = line_split[-1] | ||||||
|  |                     right = line_split[-2] | ||||||
|  |                     top = line_split[-3] | ||||||
|  |                     left = line_split[-4] | ||||||
|  |                     class_name = "" | ||||||
|  |                     for name in line_split[:-4]: | ||||||
|  |                         class_name += name + " " | ||||||
|  |                     class_name = class_name[:-1] | ||||||
|  | 
 | ||||||
|  |             bbox = left + " " + top + " " + right + " " + bottom | ||||||
|  |             if is_difficult: | ||||||
|  |                 bounding_boxes.append({"class_name": class_name, "bbox": bbox, "used": False, "difficult": True}) | ||||||
|  |                 is_difficult = False | ||||||
|  |             else: | ||||||
|  |                 bounding_boxes.append({"class_name": class_name, "bbox": bbox, "used": False}) | ||||||
|  |                 if class_name in gt_counter_per_class: | ||||||
|  |                     gt_counter_per_class[class_name] += 1 | ||||||
|  |                 else: | ||||||
|  |                     gt_counter_per_class[class_name] = 1 | ||||||
|  | 
 | ||||||
|  |                 if class_name not in already_seen_classes: | ||||||
|  |                     if class_name in counter_images_per_class: | ||||||
|  |                         counter_images_per_class[class_name] += 1 | ||||||
|  |                     else: | ||||||
|  |                         counter_images_per_class[class_name] = 1 | ||||||
|  |                     already_seen_classes.append(class_name) | ||||||
|  | 
 | ||||||
|  |         with open(TEMP_FILES_PATH + "/" + file_id + "_ground_truth.json", 'w') as outfile: | ||||||
|  |             json.dump(bounding_boxes, outfile) | ||||||
|  | 
 | ||||||
|  |     gt_classes = list(gt_counter_per_class.keys()) | ||||||
|  |     gt_classes = sorted(gt_classes) | ||||||
|  |     n_classes = len(gt_classes) | ||||||
|  | 
 | ||||||
|  |     dr_files_list = glob.glob(DR_PATH + '/*.txt') | ||||||
|  |     dr_files_list.sort() | ||||||
|  |     for class_index, class_name in enumerate(gt_classes): | ||||||
|  |         bounding_boxes = [] | ||||||
|  |         for txt_file in dr_files_list: | ||||||
|  |             file_id = txt_file.split(".txt", 1)[0] | ||||||
|  |             file_id = os.path.basename(os.path.normpath(file_id)) | ||||||
|  |             temp_path = os.path.join(GT_PATH, (file_id + ".txt")) | ||||||
|  |             if class_index == 0: | ||||||
|  |                 if not os.path.exists(temp_path): | ||||||
|  |                     error_msg = "Error. File not found: {}\n".format(temp_path) | ||||||
|  |                     error(error_msg) | ||||||
|  |             lines = file_lines_to_list(txt_file) | ||||||
|  |             for line in lines: | ||||||
|  |                 try: | ||||||
|  |                     tmp_class_name, confidence, left, top, right, bottom = line.split() | ||||||
|  |                 except: | ||||||
|  |                     line_split = line.split() | ||||||
|  |                     bottom = line_split[-1] | ||||||
|  |                     right = line_split[-2] | ||||||
|  |                     top = line_split[-3] | ||||||
|  |                     left = line_split[-4] | ||||||
|  |                     confidence = line_split[-5] | ||||||
|  |                     tmp_class_name = "" | ||||||
|  |                     for name in line_split[:-5]: | ||||||
|  |                         tmp_class_name += name + " " | ||||||
|  |                     tmp_class_name = tmp_class_name[:-1] | ||||||
|  | 
 | ||||||
|  |                 if tmp_class_name == class_name: | ||||||
|  |                     bbox = left + " " + top + " " + right + " " + bottom | ||||||
|  |                     bounding_boxes.append({"confidence": confidence, "file_id": file_id, "bbox": bbox}) | ||||||
|  | 
 | ||||||
|  |         bounding_boxes.sort(key=lambda x: float(x['confidence']), reverse=True) | ||||||
|  |         with open(TEMP_FILES_PATH + "/" + class_name + "_dr.json", 'w') as outfile: | ||||||
|  |             json.dump(bounding_boxes, outfile) | ||||||
|  | 
 | ||||||
|  |     sum_AP = 0.0 | ||||||
|  |     ap_dictionary = {} | ||||||
|  |     lamr_dictionary = {} | ||||||
|  |     with open(RESULTS_FILES_PATH + "/results.txt", 'w') as results_file: | ||||||
|  |         results_file.write("# AP and precision/recall per class\n") | ||||||
|  |         count_true_positives = {} | ||||||
|  | 
 | ||||||
|  |         for class_index, class_name in enumerate(gt_classes): | ||||||
|  |             count_true_positives[class_name] = 0 | ||||||
|  |             dr_file = TEMP_FILES_PATH + "/" + class_name + "_dr.json" | ||||||
|  |             dr_data = json.load(open(dr_file)) | ||||||
|  | 
 | ||||||
|  |             nd = len(dr_data) | ||||||
|  |             tp = [0] * nd | ||||||
|  |             fp = [0] * nd | ||||||
|  |             score = [0] * nd | ||||||
|  |             score_threhold_idx = 0 | ||||||
|  |             for idx, detection in enumerate(dr_data): | ||||||
|  |                 file_id = detection["file_id"] | ||||||
|  |                 score[idx] = float(detection["confidence"]) | ||||||
|  |                 if score[idx] >= score_threhold: | ||||||
|  |                     score_threhold_idx = idx | ||||||
|  | 
 | ||||||
|  |                 if show_animation: | ||||||
|  |                     ground_truth_img = glob.glob1(IMG_PATH, file_id + ".*") | ||||||
|  |                     if len(ground_truth_img) == 0: | ||||||
|  |                         error("Error. Image not found with id: " + file_id) | ||||||
|  |                     elif len(ground_truth_img) > 1: | ||||||
|  |                         error("Error. Multiple image with id: " + file_id) | ||||||
|  |                     else: | ||||||
|  |                         img = cv2.imread(IMG_PATH + "/" + ground_truth_img[0]) | ||||||
|  |                         img_cumulative_path = RESULTS_FILES_PATH + "/images/" + ground_truth_img[0] | ||||||
|  |                         if os.path.isfile(img_cumulative_path): | ||||||
|  |                             img_cumulative = cv2.imread(img_cumulative_path) | ||||||
|  |                         else: | ||||||
|  |                             img_cumulative = img.copy() | ||||||
|  |                         bottom_border = 60 | ||||||
|  |                         BLACK = [0, 0, 0] | ||||||
|  |                         img = cv2.copyMakeBorder(img, 0, bottom_border, 0, 0, cv2.BORDER_CONSTANT, value=BLACK) | ||||||
|  | 
 | ||||||
|  |                 gt_file = TEMP_FILES_PATH + "/" + file_id + "_ground_truth.json" | ||||||
|  |                 ground_truth_data = json.load(open(gt_file)) | ||||||
|  |                 ovmax = -1 | ||||||
|  |                 gt_match = -1 | ||||||
|  |                 bb = [float(x) for x in detection["bbox"].split()] | ||||||
|  |                 for obj in ground_truth_data: | ||||||
|  |                     if obj["class_name"] == class_name: | ||||||
|  |                         bbgt = [float(x) for x in obj["bbox"].split()] | ||||||
|  |                         bi = [max(bb[0], bbgt[0]), max(bb[1], bbgt[1]), min(bb[2], bbgt[2]), min(bb[3], bbgt[3])] | ||||||
|  |                         iw = bi[2] - bi[0] + 1 | ||||||
|  |                         ih = bi[3] - bi[1] + 1 | ||||||
|  |                         if iw > 0 and ih > 0: | ||||||
|  |                             ua = (bb[2] - bb[0] + 1) * (bb[3] - bb[1] + 1) + (bbgt[2] - bbgt[0] | ||||||
|  |                                                                               + 1) * (bbgt[3] - bbgt[1] + 1) - iw * ih | ||||||
|  |                             ov = iw * ih / ua | ||||||
|  |                             if ov > ovmax: | ||||||
|  |                                 ovmax = ov | ||||||
|  |                                 gt_match = obj | ||||||
|  | 
 | ||||||
|  |                 if show_animation: | ||||||
|  |                     status = "NO MATCH FOUND!" | ||||||
|  | 
 | ||||||
|  |                 min_overlap = MINOVERLAP | ||||||
|  |                 if ovmax >= min_overlap: | ||||||
|  |                     if "difficult" not in gt_match: | ||||||
|  |                         if not bool(gt_match["used"]): | ||||||
|  |                             tp[idx] = 1 | ||||||
|  |                             gt_match["used"] = True | ||||||
|  |                             count_true_positives[class_name] += 1 | ||||||
|  |                             with open(gt_file, 'w') as f: | ||||||
|  |                                 f.write(json.dumps(ground_truth_data)) | ||||||
|  |                             if show_animation: | ||||||
|  |                                 status = "MATCH!" | ||||||
|  |                         else: | ||||||
|  |                             fp[idx] = 1 | ||||||
|  |                             if show_animation: | ||||||
|  |                                 status = "REPEATED MATCH!" | ||||||
|  |                 else: | ||||||
|  |                     fp[idx] = 1 | ||||||
|  |                     if ovmax > 0: | ||||||
|  |                         status = "INSUFFICIENT OVERLAP" | ||||||
|  | 
 | ||||||
|  |                 """ | ||||||
|  |                 Draw image to show animation | ||||||
|  |                 """ | ||||||
|  |                 if show_animation: | ||||||
|  |                     height, widht = img.shape[:2] | ||||||
|  |                     white = (255, 255, 255) | ||||||
|  |                     light_blue = (255, 200, 100) | ||||||
|  |                     green = (0, 255, 0) | ||||||
|  |                     light_red = (30, 30, 255) | ||||||
|  |                     margin = 10 | ||||||
|  |                     # 1nd line | ||||||
|  |                     v_pos = int(height - margin - (bottom_border / 2.0)) | ||||||
|  |                     text = "Image: " + ground_truth_img[0] + " " | ||||||
|  |                     img, line_width = draw_text_in_image(img, text, (margin, v_pos), white, 0) | ||||||
|  |                     text = "Class [" + str(class_index) + "/" + str(n_classes) + "]: " + class_name + " " | ||||||
|  |                     img, line_width = draw_text_in_image(img, text, (margin + line_width, v_pos), light_blue, | ||||||
|  |                                                          line_width) | ||||||
|  |                     if ovmax != -1: | ||||||
|  |                         color = light_red | ||||||
|  |                         if status == "INSUFFICIENT OVERLAP": | ||||||
|  |                             text = "IoU: {0:.2f}% ".format(ovmax * 100) + "< {0:.2f}% ".format(min_overlap * 100) | ||||||
|  |                         else: | ||||||
|  |                             text = "IoU: {0:.2f}% ".format(ovmax * 100) + ">= {0:.2f}% ".format(min_overlap * 100) | ||||||
|  |                             color = green | ||||||
|  |                         img, _ = draw_text_in_image(img, text, (margin + line_width, v_pos), color, line_width) | ||||||
|  |                     # 2nd line | ||||||
|  |                     v_pos += int(bottom_border / 2.0) | ||||||
|  |                     rank_pos = str(idx + 1) | ||||||
|  |                     text = "Detection #rank: " + rank_pos + " confidence: {0:.2f}% ".format( | ||||||
|  |                         float(detection["confidence"]) * 100) | ||||||
|  |                     img, line_width = draw_text_in_image(img, text, (margin, v_pos), white, 0) | ||||||
|  |                     color = light_red | ||||||
|  |                     if status == "MATCH!": | ||||||
|  |                         color = green | ||||||
|  |                     text = "Result: " + status + " " | ||||||
|  |                     img, line_width = draw_text_in_image(img, text, (margin + line_width, v_pos), color, line_width) | ||||||
|  | 
 | ||||||
|  |                     font = cv2.FONT_HERSHEY_SIMPLEX | ||||||
|  |                     if ovmax > 0: | ||||||
|  |                         bbgt = [int(round(float(x))) for x in gt_match["bbox"].split()] | ||||||
|  |                         cv2.rectangle(img, (bbgt[0], bbgt[1]), (bbgt[2], bbgt[3]), light_blue, 2) | ||||||
|  |                         cv2.rectangle(img_cumulative, (bbgt[0], bbgt[1]), (bbgt[2], bbgt[3]), light_blue, 2) | ||||||
|  |                         cv2.putText(img_cumulative, class_name, (bbgt[0], bbgt[1] - 5), font, 0.6, light_blue, 1, | ||||||
|  |                                     cv2.LINE_AA) | ||||||
|  |                     bb = [int(i) for i in bb] | ||||||
|  |                     cv2.rectangle(img, (bb[0], bb[1]), (bb[2], bb[3]), color, 2) | ||||||
|  |                     cv2.rectangle(img_cumulative, (bb[0], bb[1]), (bb[2], bb[3]), color, 2) | ||||||
|  |                     cv2.putText(img_cumulative, class_name, (bb[0], bb[1] - 5), font, 0.6, color, 1, cv2.LINE_AA) | ||||||
|  | 
 | ||||||
|  |                     cv2.imshow("Animation", img) | ||||||
|  |                     cv2.waitKey(20) | ||||||
|  |                     output_img_path = RESULTS_FILES_PATH + "/images/detections_one_by_one/" + class_name + "_detection" + str( | ||||||
|  |                         idx) + ".jpg" | ||||||
|  |                     cv2.imwrite(output_img_path, img) | ||||||
|  |                     cv2.imwrite(img_cumulative_path, img_cumulative) | ||||||
|  | 
 | ||||||
|  |             cumsum = 0 | ||||||
|  |             for idx, val in enumerate(fp): | ||||||
|  |                 fp[idx] += cumsum | ||||||
|  |                 cumsum += val | ||||||
|  | 
 | ||||||
|  |             cumsum = 0 | ||||||
|  |             for idx, val in enumerate(tp): | ||||||
|  |                 tp[idx] += cumsum | ||||||
|  |                 cumsum += val | ||||||
|  | 
 | ||||||
|  |             rec = tp[:] | ||||||
|  |             for idx, val in enumerate(tp): | ||||||
|  |                 rec[idx] = float(tp[idx]) / np.maximum(gt_counter_per_class[class_name], 1) | ||||||
|  | 
 | ||||||
|  |             prec = tp[:] | ||||||
|  |             for idx, val in enumerate(tp): | ||||||
|  |                 prec[idx] = float(tp[idx]) / np.maximum((fp[idx] + tp[idx]), 1) | ||||||
|  | 
 | ||||||
|  |             ap, mrec, mprec = voc_ap(rec[:], prec[:]) | ||||||
|  |             F1 = np.array(rec) * np.array(prec) * 2 / np.where((np.array(prec) + np.array(rec)) == 0, 1, | ||||||
|  |                                                                (np.array(prec) + np.array(rec))) | ||||||
|  | 
 | ||||||
|  |             sum_AP += ap | ||||||
|  |             text = "{0:.2f}%".format( | ||||||
|  |                 ap * 100) + " = " + class_name + " AP "  # class_name + " AP = {0:.2f}%".format(ap*100) | ||||||
|  | 
 | ||||||
|  |             if len(prec) > 0: | ||||||
|  |                 F1_text = "{0:.2f}".format(F1[score_threhold_idx]) + " = " + class_name + " F1 " | ||||||
|  |                 Recall_text = "{0:.2f}%".format(rec[score_threhold_idx] * 100) + " = " + class_name + " Recall " | ||||||
|  |                 Precision_text = "{0:.2f}%".format(prec[score_threhold_idx] * 100) + " = " + class_name + " Precision " | ||||||
|  |             else: | ||||||
|  |                 F1_text = "0.00" + " = " + class_name + " F1 " | ||||||
|  |                 Recall_text = "0.00%" + " = " + class_name + " Recall " | ||||||
|  |                 Precision_text = "0.00%" + " = " + class_name + " Precision " | ||||||
|  | 
 | ||||||
|  |             rounded_prec = ['%.2f' % elem for elem in prec] | ||||||
|  |             rounded_rec = ['%.2f' % elem for elem in rec] | ||||||
|  |             results_file.write(text + "\n Precision: " + str(rounded_prec) + "\n Recall :" + str(rounded_rec) + "\n\n") | ||||||
|  | 
 | ||||||
|  |             if len(prec) > 0: | ||||||
|  |                 print(text + "\t||\tscore_threhold=" + str(score_threhold) + " : " + "F1=" + "{0:.2f}".format( | ||||||
|  |                     F1[score_threhold_idx]) \ | ||||||
|  |                       + " ; Recall=" + "{0:.2f}%".format( | ||||||
|  |                     rec[score_threhold_idx] * 100) + " ; Precision=" + "{0:.2f}%".format( | ||||||
|  |                     prec[score_threhold_idx] * 100)) | ||||||
|  |             else: | ||||||
|  |                 print(text + "\t||\tscore_threhold=" + str( | ||||||
|  |                     score_threhold) + " : " + "F1=0.00% ; Recall=0.00% ; Precision=0.00%") | ||||||
|  |             ap_dictionary[class_name] = ap | ||||||
|  | 
 | ||||||
|  |             n_images = counter_images_per_class[class_name] | ||||||
|  |             lamr, mr, fppi = log_average_miss_rate(np.array(rec), np.array(fp), n_images) | ||||||
|  |             lamr_dictionary[class_name] = lamr | ||||||
|  | 
 | ||||||
|  |             if draw_plot: | ||||||
|  |                 plt.plot(rec, prec, '-o') | ||||||
|  |                 area_under_curve_x = mrec[:-1] + [mrec[-2]] + [mrec[-1]] | ||||||
|  |                 area_under_curve_y = mprec[:-1] + [0.0] + [mprec[-1]] | ||||||
|  |                 plt.fill_between(area_under_curve_x, 0, area_under_curve_y, alpha=0.2, edgecolor='r') | ||||||
|  | 
 | ||||||
|  |                 fig = plt.gcf() | ||||||
|  |                 fig.canvas.set_window_title('AP ' + class_name) | ||||||
|  | 
 | ||||||
|  |                 plt.title('class: ' + text) | ||||||
|  |                 plt.xlabel('Recall') | ||||||
|  |                 plt.ylabel('Precision') | ||||||
|  |                 axes = plt.gca() | ||||||
|  |                 axes.set_xlim([0.0, 1.0]) | ||||||
|  |                 axes.set_ylim([0.0, 1.05]) | ||||||
|  |                 fig.savefig(RESULTS_FILES_PATH + "/AP/" + class_name + ".png") | ||||||
|  |                 plt.cla() | ||||||
|  | 
 | ||||||
|  |                 plt.plot(score, F1, "-", color='orangered') | ||||||
|  |                 plt.title('class: ' + F1_text + "\nscore_threhold=" + str(score_threhold)) | ||||||
|  |                 plt.xlabel('Score_Threhold') | ||||||
|  |                 plt.ylabel('F1') | ||||||
|  |                 axes = plt.gca() | ||||||
|  |                 axes.set_xlim([0.0, 1.0]) | ||||||
|  |                 axes.set_ylim([0.0, 1.05]) | ||||||
|  |                 fig.savefig(RESULTS_FILES_PATH + "/F1/" + class_name + ".png") | ||||||
|  |                 plt.cla() | ||||||
|  | 
 | ||||||
|  |                 plt.plot(score, rec, "-H", color='gold') | ||||||
|  |                 plt.title('class: ' + Recall_text + "\nscore_threhold=" + str(score_threhold)) | ||||||
|  |                 plt.xlabel('Score_Threhold') | ||||||
|  |                 plt.ylabel('Recall') | ||||||
|  |                 axes = plt.gca() | ||||||
|  |                 axes.set_xlim([0.0, 1.0]) | ||||||
|  |                 axes.set_ylim([0.0, 1.05]) | ||||||
|  |                 fig.savefig(RESULTS_FILES_PATH + "/Recall/" + class_name + ".png") | ||||||
|  |                 plt.cla() | ||||||
|  | 
 | ||||||
|  |                 plt.plot(score, prec, "-s", color='palevioletred') | ||||||
|  |                 plt.title('class: ' + Precision_text + "\nscore_threhold=" + str(score_threhold)) | ||||||
|  |                 plt.xlabel('Score_Threhold') | ||||||
|  |                 plt.ylabel('Precision') | ||||||
|  |                 axes = plt.gca() | ||||||
|  |                 axes.set_xlim([0.0, 1.0]) | ||||||
|  |                 axes.set_ylim([0.0, 1.05]) | ||||||
|  |                 fig.savefig(RESULTS_FILES_PATH + "/Precision/" + class_name + ".png") | ||||||
|  |                 plt.cla() | ||||||
|  | 
 | ||||||
|  |         if show_animation: | ||||||
|  |             cv2.destroyAllWindows() | ||||||
|  |         if n_classes == 0: | ||||||
|  |             print("未检测到任何种类,请检查标签信息与get_map.py中的classes_path是否修改。") | ||||||
|  |             return 0 | ||||||
|  |         results_file.write("\n# mAP of all classes\n") | ||||||
|  |         mAP = sum_AP / n_classes | ||||||
|  |         text = "mAP = {0:.2f}%".format(mAP * 100) | ||||||
|  |         results_file.write(text + "\n") | ||||||
|  |         print(text) | ||||||
|  | 
 | ||||||
|  |     shutil.rmtree(TEMP_FILES_PATH) | ||||||
|  | 
 | ||||||
|  |     """ | ||||||
|  |     Count total of detection-results | ||||||
|  |     """ | ||||||
|  |     det_counter_per_class = {} | ||||||
|  |     for txt_file in dr_files_list: | ||||||
|  |         lines_list = file_lines_to_list(txt_file) | ||||||
|  |         for line in lines_list: | ||||||
|  |             class_name = line.split()[0] | ||||||
|  |             if class_name in det_counter_per_class: | ||||||
|  |                 det_counter_per_class[class_name] += 1 | ||||||
|  |             else: | ||||||
|  |                 det_counter_per_class[class_name] = 1 | ||||||
|  |     dr_classes = list(det_counter_per_class.keys()) | ||||||
|  | 
 | ||||||
|  |     """ | ||||||
|  |     Write number of ground-truth objects per class to results.txt | ||||||
|  |     """ | ||||||
|  |     with open(RESULTS_FILES_PATH + "/results.txt", 'a') as results_file: | ||||||
|  |         results_file.write("\n# Number of ground-truth objects per class\n") | ||||||
|  |         for class_name in sorted(gt_counter_per_class): | ||||||
|  |             results_file.write(class_name + ": " + str(gt_counter_per_class[class_name]) + "\n") | ||||||
|  | 
 | ||||||
|  |     """ | ||||||
|  |     Finish counting true positives | ||||||
|  |     """ | ||||||
|  |     for class_name in dr_classes: | ||||||
|  |         if class_name not in gt_classes: | ||||||
|  |             count_true_positives[class_name] = 0 | ||||||
|  | 
 | ||||||
|  |     """ | ||||||
|  |     Write number of detected objects per class to results.txt | ||||||
|  |     """ | ||||||
|  |     with open(RESULTS_FILES_PATH + "/results.txt", 'a') as results_file: | ||||||
|  |         results_file.write("\n# Number of detected objects per class\n") | ||||||
|  |         for class_name in sorted(dr_classes): | ||||||
|  |             n_det = det_counter_per_class[class_name] | ||||||
|  |             text = class_name + ": " + str(n_det) | ||||||
|  |             text += " (tp:" + str(count_true_positives[class_name]) + "" | ||||||
|  |             text += ", fp:" + str(n_det - count_true_positives[class_name]) + ")\n" | ||||||
|  |             results_file.write(text) | ||||||
|  | 
 | ||||||
|  |     """ | ||||||
|  |     Plot the total number of occurences of each class in the ground-truth | ||||||
|  |     """ | ||||||
|  |     if draw_plot: | ||||||
|  |         window_title = "ground-truth-info" | ||||||
|  |         plot_title = "ground-truth\n" | ||||||
|  |         plot_title += "(" + str(len(ground_truth_files_list)) + " files and " + str(n_classes) + " classes)" | ||||||
|  |         x_label = "Number of objects per class" | ||||||
|  |         output_path = RESULTS_FILES_PATH + "/ground-truth-info.png" | ||||||
|  |         to_show = False | ||||||
|  |         plot_color = 'forestgreen' | ||||||
|  |         draw_plot_func( | ||||||
|  |             gt_counter_per_class, | ||||||
|  |             n_classes, | ||||||
|  |             window_title, | ||||||
|  |             plot_title, | ||||||
|  |             x_label, | ||||||
|  |             output_path, | ||||||
|  |             to_show, | ||||||
|  |             plot_color, | ||||||
|  |             '', | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |     # """ | ||||||
|  |     # Plot the total number of occurences of each class in the "detection-results" folder | ||||||
|  |     # """ | ||||||
|  |     # if draw_plot: | ||||||
|  |     #     window_title = "detection-results-info" | ||||||
|  |     #     # Plot title | ||||||
|  |     #     plot_title = "detection-results\n" | ||||||
|  |     #     plot_title += "(" + str(len(dr_files_list)) + " files and " | ||||||
|  |     #     count_non_zero_values_in_dictionary = sum(int(x) > 0 for x in list(det_counter_per_class.values())) | ||||||
|  |     #     plot_title += str(count_non_zero_values_in_dictionary) + " detected classes)" | ||||||
|  |     #     # end Plot title | ||||||
|  |     #     x_label = "Number of objects per class" | ||||||
|  |     #     output_path = RESULTS_FILES_PATH + "/detection-results-info.png" | ||||||
|  |     #     to_show = False | ||||||
|  |     #     plot_color = 'forestgreen' | ||||||
|  |     #     true_p_bar = count_true_positives | ||||||
|  |     #     draw_plot_func( | ||||||
|  |     #         det_counter_per_class, | ||||||
|  |     #         len(det_counter_per_class), | ||||||
|  |     #         window_title, | ||||||
|  |     #         plot_title, | ||||||
|  |     #         x_label, | ||||||
|  |     #         output_path, | ||||||
|  |     #         to_show, | ||||||
|  |     #         plot_color, | ||||||
|  |     #         true_p_bar | ||||||
|  |     #         ) | ||||||
|  | 
 | ||||||
|  |     """ | ||||||
|  |     Draw log-average miss rate plot (Show lamr of all classes in decreasing order) | ||||||
|  |     """ | ||||||
|  |     if draw_plot: | ||||||
|  |         window_title = "lamr" | ||||||
|  |         plot_title = "log-average miss rate" | ||||||
|  |         x_label = "log-average miss rate" | ||||||
|  |         output_path = RESULTS_FILES_PATH + "/lamr.png" | ||||||
|  |         to_show = False | ||||||
|  |         plot_color = 'royalblue' | ||||||
|  |         draw_plot_func( | ||||||
|  |             lamr_dictionary, | ||||||
|  |             n_classes, | ||||||
|  |             window_title, | ||||||
|  |             plot_title, | ||||||
|  |             x_label, | ||||||
|  |             output_path, | ||||||
|  |             to_show, | ||||||
|  |             plot_color, | ||||||
|  |             "" | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |     """ | ||||||
|  |     Draw mAP plot (Show AP's of all classes in decreasing order) | ||||||
|  |     """ | ||||||
|  |     if draw_plot: | ||||||
|  |         window_title = "mAP" | ||||||
|  |         plot_title = "mAP = {0:.2f}%".format(mAP * 100) | ||||||
|  |         x_label = "Average Precision" | ||||||
|  |         output_path = RESULTS_FILES_PATH + "/mAP.png" | ||||||
|  |         to_show = True | ||||||
|  |         plot_color = 'royalblue' | ||||||
|  |         draw_plot_func( | ||||||
|  |             ap_dictionary, | ||||||
|  |             n_classes, | ||||||
|  |             window_title, | ||||||
|  |             plot_title, | ||||||
|  |             x_label, | ||||||
|  |             output_path, | ||||||
|  |             to_show, | ||||||
|  |             plot_color, | ||||||
|  |             "" | ||||||
|  |         ) | ||||||
|  |     return mAP | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def preprocess_gt(gt_path, class_names): | ||||||
|  |     image_ids = os.listdir(gt_path) | ||||||
|  |     results = {} | ||||||
|  | 
 | ||||||
|  |     images = [] | ||||||
|  |     bboxes = [] | ||||||
|  |     for i, image_id in enumerate(image_ids): | ||||||
|  |         lines_list = file_lines_to_list(os.path.join(gt_path, image_id)) | ||||||
|  |         boxes_per_image = [] | ||||||
|  |         image = {} | ||||||
|  |         image_id = os.path.splitext(image_id)[0] | ||||||
|  |         image['file_name'] = image_id + '.jpg' | ||||||
|  |         image['width'] = 1 | ||||||
|  |         image['height'] = 1 | ||||||
|  |         # -----------------------------------------------------------------# | ||||||
|  |         #   感谢 多学学英语吧 的提醒 | ||||||
|  |         #   解决了'Results do not correspond to current coco set'问题 | ||||||
|  |         # -----------------------------------------------------------------# | ||||||
|  |         image['id'] = str(image_id) | ||||||
|  | 
 | ||||||
|  |         for line in lines_list: | ||||||
|  |             difficult = 0 | ||||||
|  |             if "difficult" in line: | ||||||
|  |                 line_split = line.split() | ||||||
|  |                 left, top, right, bottom, _difficult = line_split[-5:] | ||||||
|  |                 class_name = "" | ||||||
|  |                 for name in line_split[:-5]: | ||||||
|  |                     class_name += name + " " | ||||||
|  |                 class_name = class_name[:-1] | ||||||
|  |                 difficult = 1 | ||||||
|  |             else: | ||||||
|  |                 line_split = line.split() | ||||||
|  |                 left, top, right, bottom = line_split[-4:] | ||||||
|  |                 class_name = "" | ||||||
|  |                 for name in line_split[:-4]: | ||||||
|  |                     class_name += name + " " | ||||||
|  |                 class_name = class_name[:-1] | ||||||
|  | 
 | ||||||
|  |             left, top, right, bottom = float(left), float(top), float(right), float(bottom) | ||||||
|  |             if class_name not in class_names: | ||||||
|  |                 continue | ||||||
|  |             cls_id = class_names.index(class_name) + 1 | ||||||
|  |             bbox = [left, top, right - left, bottom - top, difficult, str(image_id), cls_id, | ||||||
|  |                     (right - left) * (bottom - top) - 10.0] | ||||||
|  |             boxes_per_image.append(bbox) | ||||||
|  |         images.append(image) | ||||||
|  |         bboxes.extend(boxes_per_image) | ||||||
|  |     results['images'] = images | ||||||
|  | 
 | ||||||
|  |     categories = [] | ||||||
|  |     for i, cls in enumerate(class_names): | ||||||
|  |         category = {} | ||||||
|  |         category['supercategory'] = cls | ||||||
|  |         category['name'] = cls | ||||||
|  |         category['id'] = i + 1 | ||||||
|  |         categories.append(category) | ||||||
|  |     results['categories'] = categories | ||||||
|  | 
 | ||||||
|  |     annotations = [] | ||||||
|  |     for i, box in enumerate(bboxes): | ||||||
|  |         annotation = {} | ||||||
|  |         annotation['area'] = box[-1] | ||||||
|  |         annotation['category_id'] = box[-2] | ||||||
|  |         annotation['image_id'] = box[-3] | ||||||
|  |         annotation['iscrowd'] = box[-4] | ||||||
|  |         annotation['bbox'] = box[:4] | ||||||
|  |         annotation['id'] = i | ||||||
|  |         annotations.append(annotation) | ||||||
|  |     results['annotations'] = annotations | ||||||
|  |     return results | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def preprocess_dr(dr_path, class_names): | ||||||
|  |     image_ids = os.listdir(dr_path) | ||||||
|  |     results = [] | ||||||
|  |     for image_id in image_ids: | ||||||
|  |         lines_list = file_lines_to_list(os.path.join(dr_path, image_id)) | ||||||
|  |         image_id = os.path.splitext(image_id)[0] | ||||||
|  |         for line in lines_list: | ||||||
|  |             line_split = line.split() | ||||||
|  |             confidence, left, top, right, bottom = line_split[-5:] | ||||||
|  |             class_name = "" | ||||||
|  |             for name in line_split[:-5]: | ||||||
|  |                 class_name += name + " " | ||||||
|  |             class_name = class_name[:-1] | ||||||
|  |             left, top, right, bottom = float(left), float(top), float(right), float(bottom) | ||||||
|  |             result = {} | ||||||
|  |             result["image_id"] = str(image_id) | ||||||
|  |             if class_name not in class_names: | ||||||
|  |                 continue | ||||||
|  |             result["category_id"] = class_names.index(class_name) + 1 | ||||||
|  |             result["bbox"] = [left, top, right - left, bottom - top] | ||||||
|  |             result["score"] = float(confidence) | ||||||
|  |             results.append(result) | ||||||
|  |     return results | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def get_coco_map(class_names, path): | ||||||
|  |     GT_PATH = os.path.join(path, 'ground-truth') | ||||||
|  |     DR_PATH = os.path.join(path, 'detection-results') | ||||||
|  |     COCO_PATH = os.path.join(path, 'coco_eval') | ||||||
|  | 
 | ||||||
|  |     if not os.path.exists(COCO_PATH): | ||||||
|  |         os.makedirs(COCO_PATH) | ||||||
|  | 
 | ||||||
|  |     GT_JSON_PATH = os.path.join(COCO_PATH, 'instances_gt.json') | ||||||
|  |     DR_JSON_PATH = os.path.join(COCO_PATH, 'instances_dr.json') | ||||||
|  | 
 | ||||||
|  |     with open(GT_JSON_PATH, "w") as f: | ||||||
|  |         results_gt = preprocess_gt(GT_PATH, class_names) | ||||||
|  |         json.dump(results_gt, f, indent=4) | ||||||
|  | 
 | ||||||
|  |     with open(DR_JSON_PATH, "w") as f: | ||||||
|  |         results_dr = preprocess_dr(DR_PATH, class_names) | ||||||
|  |         json.dump(results_dr, f, indent=4) | ||||||
|  |         if len(results_dr) == 0: | ||||||
|  |             print("未检测到任何目标。") | ||||||
|  |             return [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] | ||||||
|  | 
 | ||||||
|  |     cocoGt = COCO(GT_JSON_PATH) | ||||||
|  |     cocoDt = cocoGt.loadRes(DR_JSON_PATH) | ||||||
|  |     cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') | ||||||
|  |     cocoEval.evaluate() | ||||||
|  |     cocoEval.accumulate() | ||||||
|  |     cocoEval.summarize() | ||||||
|  | 
 | ||||||
|  |     return cocoEval.stats | ||||||
							
								
								
									
										117
									
								
								utils_coco/coco_annotation.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										117
									
								
								utils_coco/coco_annotation.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,117 @@ | |||||||
|  | # -------------------------------------------------------# | ||||||
|  | #   用于处理COCO数据集,根据json文件生成txt文件用于训练 | ||||||
|  | # -------------------------------------------------------# | ||||||
|  | import json | ||||||
|  | import os | ||||||
|  | from collections import defaultdict | ||||||
|  | 
 | ||||||
|  | # -------------------------------------------------------# | ||||||
|  | #   指向了COCO训练集与验证集图片的路径 | ||||||
|  | # -------------------------------------------------------# | ||||||
|  | train_datasets_path = "coco_dataset/train2017" | ||||||
|  | val_datasets_path = "coco_dataset/val2017" | ||||||
|  | 
 | ||||||
|  | # -------------------------------------------------------# | ||||||
|  | #   指向了COCO训练集与验证集标签的路径 | ||||||
|  | # -------------------------------------------------------# | ||||||
|  | train_annotation_path = "coco_dataset/annotations/instances_train2017.json" | ||||||
|  | val_annotation_path = "coco_dataset/annotations/instances_val2017.json" | ||||||
|  | 
 | ||||||
|  | # -------------------------------------------------------# | ||||||
|  | #   生成的txt文件路径 | ||||||
|  | # -------------------------------------------------------# | ||||||
|  | train_output_path = "coco_train.txt" | ||||||
|  | val_output_path = "coco_val.txt" | ||||||
|  | 
 | ||||||
|  | if __name__ == "__main__": | ||||||
|  |     name_box_id = defaultdict(list) | ||||||
|  |     id_name = dict() | ||||||
|  |     f = open(train_annotation_path, encoding='utf-8') | ||||||
|  |     data = json.load(f) | ||||||
|  | 
 | ||||||
|  |     annotations = data['annotations'] | ||||||
|  |     for ant in annotations: | ||||||
|  |         id = ant['image_id'] | ||||||
|  |         name = os.path.join(train_datasets_path, '%012d.jpg' % id) | ||||||
|  |         cat = ant['category_id'] | ||||||
|  |         if cat >= 1 and cat <= 11: | ||||||
|  |             cat = cat - 1 | ||||||
|  |         elif cat >= 13 and cat <= 25: | ||||||
|  |             cat = cat - 2 | ||||||
|  |         elif cat >= 27 and cat <= 28: | ||||||
|  |             cat = cat - 3 | ||||||
|  |         elif cat >= 31 and cat <= 44: | ||||||
|  |             cat = cat - 5 | ||||||
|  |         elif cat >= 46 and cat <= 65: | ||||||
|  |             cat = cat - 6 | ||||||
|  |         elif cat == 67: | ||||||
|  |             cat = cat - 7 | ||||||
|  |         elif cat == 70: | ||||||
|  |             cat = cat - 9 | ||||||
|  |         elif cat >= 72 and cat <= 82: | ||||||
|  |             cat = cat - 10 | ||||||
|  |         elif cat >= 84 and cat <= 90: | ||||||
|  |             cat = cat - 11 | ||||||
|  |         name_box_id[name].append([ant['bbox'], cat]) | ||||||
|  | 
 | ||||||
|  |     f = open(train_output_path, 'w') | ||||||
|  |     for key in name_box_id.keys(): | ||||||
|  |         f.write(key) | ||||||
|  |         box_infos = name_box_id[key] | ||||||
|  |         for info in box_infos: | ||||||
|  |             x_min = int(info[0][0]) | ||||||
|  |             y_min = int(info[0][1]) | ||||||
|  |             x_max = x_min + int(info[0][2]) | ||||||
|  |             y_max = y_min + int(info[0][3]) | ||||||
|  | 
 | ||||||
|  |             box_info = " %d,%d,%d,%d,%d" % ( | ||||||
|  |                 x_min, y_min, x_max, y_max, int(info[1])) | ||||||
|  |             f.write(box_info) | ||||||
|  |         f.write('\n') | ||||||
|  |     f.close() | ||||||
|  | 
 | ||||||
|  |     name_box_id = defaultdict(list) | ||||||
|  |     id_name = dict() | ||||||
|  |     f = open(val_annotation_path, encoding='utf-8') | ||||||
|  |     data = json.load(f) | ||||||
|  | 
 | ||||||
|  |     annotations = data['annotations'] | ||||||
|  |     for ant in annotations: | ||||||
|  |         id = ant['image_id'] | ||||||
|  |         name = os.path.join(val_datasets_path, '%012d.jpg' % id) | ||||||
|  |         cat = ant['category_id'] | ||||||
|  |         if cat >= 1 and cat <= 11: | ||||||
|  |             cat = cat - 1 | ||||||
|  |         elif cat >= 13 and cat <= 25: | ||||||
|  |             cat = cat - 2 | ||||||
|  |         elif cat >= 27 and cat <= 28: | ||||||
|  |             cat = cat - 3 | ||||||
|  |         elif cat >= 31 and cat <= 44: | ||||||
|  |             cat = cat - 5 | ||||||
|  |         elif cat >= 46 and cat <= 65: | ||||||
|  |             cat = cat - 6 | ||||||
|  |         elif cat == 67: | ||||||
|  |             cat = cat - 7 | ||||||
|  |         elif cat == 70: | ||||||
|  |             cat = cat - 9 | ||||||
|  |         elif cat >= 72 and cat <= 82: | ||||||
|  |             cat = cat - 10 | ||||||
|  |         elif cat >= 84 and cat <= 90: | ||||||
|  |             cat = cat - 11 | ||||||
|  |         name_box_id[name].append([ant['bbox'], cat]) | ||||||
|  | 
 | ||||||
|  |     f = open(val_output_path, 'w') | ||||||
|  |     for key in name_box_id.keys(): | ||||||
|  |         f.write(key) | ||||||
|  |         box_infos = name_box_id[key] | ||||||
|  |         for info in box_infos: | ||||||
|  |             x_min = int(info[0][0]) | ||||||
|  |             y_min = int(info[0][1]) | ||||||
|  |             x_max = x_min + int(info[0][2]) | ||||||
|  |             y_max = y_min + int(info[0][3]) | ||||||
|  | 
 | ||||||
|  |             box_info = " %d,%d,%d,%d,%d" % ( | ||||||
|  |                 x_min, y_min, x_max, y_max, int(info[1])) | ||||||
|  |             f.write(box_info) | ||||||
|  |         f.write('\n') | ||||||
|  |     f.close() | ||||||
							
								
								
									
										116
									
								
								utils_coco/get_map_coco.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										116
									
								
								utils_coco/get_map_coco.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,116 @@ | |||||||
|  | import json | ||||||
|  | import os | ||||||
|  | 
 | ||||||
|  | import numpy as np | ||||||
|  | import torch | ||||||
|  | from PIL import Image | ||||||
|  | from pycocotools.coco import COCO | ||||||
|  | from pycocotools.cocoeval import COCOeval | ||||||
|  | from tqdm import tqdm | ||||||
|  | 
 | ||||||
|  | from utils.utils import cvtColor, preprocess_input, resize_image | ||||||
|  | from yolo import YOLO | ||||||
|  | 
 | ||||||
|  | # ---------------------------------------------------------------------------# | ||||||
|  | #   map_mode用于指定该文件运行时计算的内容 | ||||||
|  | #   map_mode为0代表整个map计算流程,包括获得预测结果、计算map。 | ||||||
|  | #   map_mode为1代表仅仅获得预测结果。 | ||||||
|  | #   map_mode为2代表仅仅获得计算map。 | ||||||
|  | # ---------------------------------------------------------------------------# | ||||||
|  | map_mode = 0 | ||||||
|  | # -------------------------------------------------------# | ||||||
|  | #   指向了验证集标签与图片路径 | ||||||
|  | # -------------------------------------------------------# | ||||||
|  | cocoGt_path = 'coco_dataset/annotations/instances_val2017.json' | ||||||
|  | dataset_img_path = 'coco_dataset/val2017' | ||||||
|  | # -------------------------------------------------------# | ||||||
|  | #   结果输出的文件夹,默认为map_out | ||||||
|  | # -------------------------------------------------------# | ||||||
|  | temp_save_path = 'map_out/coco_eval' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class mAP_YOLO(YOLO): | ||||||
|  |     # ---------------------------------------------------# | ||||||
|  |     #   检测图片 | ||||||
|  |     # ---------------------------------------------------# | ||||||
|  |     def detect_image(self, image_id, image, results): | ||||||
|  |         # ---------------------------------------------------# | ||||||
|  |         #   计算输入图片的高和宽 | ||||||
|  |         # ---------------------------------------------------# | ||||||
|  |         image_shape = np.array(np.shape(image)[0:2]) | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。 | ||||||
|  |         #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         image = cvtColor(image) | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         #   给图像增加灰条,实现不失真的resize | ||||||
|  |         #   也可以直接resize进行识别 | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         #   添加上batch_size维度 | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) | ||||||
|  | 
 | ||||||
|  |         with torch.no_grad(): | ||||||
|  |             images = torch.from_numpy(image_data) | ||||||
|  |             if self.cuda: | ||||||
|  |                 images = images.cuda() | ||||||
|  |             # ---------------------------------------------------------# | ||||||
|  |             #   将图像输入网络当中进行预测! | ||||||
|  |             # ---------------------------------------------------------# | ||||||
|  |             outputs = self.net(images) | ||||||
|  |             outputs = self.bbox_util.decode_box(outputs) | ||||||
|  |             # ---------------------------------------------------------# | ||||||
|  |             #   将预测框进行堆叠,然后进行非极大抑制 | ||||||
|  |             # ---------------------------------------------------------# | ||||||
|  |             outputs = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape, | ||||||
|  |                                                          image_shape, self.letterbox_image, conf_thres=self.confidence, | ||||||
|  |                                                          nms_thres=self.nms_iou) | ||||||
|  | 
 | ||||||
|  |             if outputs[0] is None: | ||||||
|  |                 return results | ||||||
|  | 
 | ||||||
|  |             top_label = np.array(outputs[0][:, 6], dtype='int32') | ||||||
|  |             top_conf = outputs[0][:, 4] * outputs[0][:, 5] | ||||||
|  |             top_boxes = outputs[0][:, :4] | ||||||
|  | 
 | ||||||
|  |         for i, c in enumerate(top_label): | ||||||
|  |             result = {} | ||||||
|  |             top, left, bottom, right = top_boxes[i] | ||||||
|  | 
 | ||||||
|  |             result["image_id"] = int(image_id) | ||||||
|  |             result["category_id"] = clsid2catid[c] | ||||||
|  |             result["bbox"] = [float(left), float(top), float(right - left), float(bottom - top)] | ||||||
|  |             result["score"] = float(top_conf[i]) | ||||||
|  |             results.append(result) | ||||||
|  |         return results | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | if __name__ == "__main__": | ||||||
|  |     if not os.path.exists(temp_save_path): | ||||||
|  |         os.makedirs(temp_save_path) | ||||||
|  | 
 | ||||||
|  |     cocoGt = COCO(cocoGt_path) | ||||||
|  |     ids = list(cocoGt.imgToAnns.keys()) | ||||||
|  |     clsid2catid = cocoGt.getCatIds() | ||||||
|  | 
 | ||||||
|  |     if map_mode == 0 or map_mode == 1: | ||||||
|  |         yolo = mAP_YOLO(confidence=0.001, nms_iou=0.65) | ||||||
|  | 
 | ||||||
|  |         with open(os.path.join(temp_save_path, 'eval_results.json'), "w") as f: | ||||||
|  |             results = [] | ||||||
|  |             for image_id in tqdm(ids): | ||||||
|  |                 image_path = os.path.join(dataset_img_path, cocoGt.loadImgs(image_id)[0]['file_name']) | ||||||
|  |                 image = Image.open(image_path) | ||||||
|  |                 results = yolo.detect_image(image_id, image, results) | ||||||
|  |             json.dump(results, f) | ||||||
|  | 
 | ||||||
|  |     if map_mode == 0 or map_mode == 2: | ||||||
|  |         cocoDt = cocoGt.loadRes(os.path.join(temp_save_path, 'eval_results.json')) | ||||||
|  |         cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') | ||||||
|  |         cocoEval.evaluate() | ||||||
|  |         cocoEval.accumulate() | ||||||
|  |         cocoEval.summarize() | ||||||
|  |         print("Get map done.") | ||||||
							
								
								
									
										158
									
								
								voc_annotation.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										158
									
								
								voc_annotation.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,158 @@ | |||||||
|  | import os | ||||||
|  | import random | ||||||
|  | import xml.etree.ElementTree as ET | ||||||
|  | 
 | ||||||
|  | import numpy as np | ||||||
|  | 
 | ||||||
|  | from utils.utils import get_classes | ||||||
|  | 
 | ||||||
|  | # --------------------------------------------------------------------------------------------------------------------------------# | ||||||
|  | #   annotation_mode用于指定该文件运行时计算的内容 | ||||||
|  | #   annotation_mode为0代表整个标签处理过程,包括获得VOCdevkit/VOC2007/ImageSets里面的txt以及训练用的2007_train.txt、2007_val.txt | ||||||
|  | #   annotation_mode为1代表获得VOCdevkit/VOC2007/ImageSets里面的txt | ||||||
|  | #   annotation_mode为2代表获得训练用的2007_train.txt、2007_val.txt | ||||||
|  | # --------------------------------------------------------------------------------------------------------------------------------# | ||||||
|  | annotation_mode = 0 | ||||||
|  | # -------------------------------------------------------------------# | ||||||
|  | #   必须要修改,用于生成2007_train.txt、2007_val.txt的目标信息 | ||||||
|  | #   与训练和预测所用的classes_path一致即可 | ||||||
|  | #   如果生成的2007_train.txt里面没有目标信息 | ||||||
|  | #   那么就是因为classes没有设定正确 | ||||||
|  | #   仅在annotation_mode为0和2的时候有效 | ||||||
|  | # -------------------------------------------------------------------# | ||||||
|  | classes_path = 'model_data/voc_classes.txt'  # 这里定义的名字是xml的物体的名字,出现的顺序是训练时的onehot顺序。 | ||||||
|  | # --------------------------------------------------------------------------------------------------------------------------------# | ||||||
|  | #   trainval_percent用于指定(训练集+验证集)与测试集的比例,默认情况下 (训练集+验证集):测试集 = 9:1 | ||||||
|  | #   train_percent用于指定(训练集+验证集)中训练集与验证集的比例,默认情况下 训练集:验证集 = 9:1 | ||||||
|  | #   仅在annotation_mode为0和1的时候有效 | ||||||
|  | # --------------------------------------------------------------------------------------------------------------------------------# | ||||||
|  | trainval_percent = 0.9 | ||||||
|  | train_percent = 0.9 | ||||||
|  | # -------------------------------------------------------# | ||||||
|  | #   指向VOC数据集所在的文件夹 | ||||||
|  | #   默认指向根目录下的VOC数据集 | ||||||
|  | # -------------------------------------------------------# | ||||||
|  | VOCdevkit_path = 'VOCdevkit' | ||||||
|  | 
 | ||||||
|  | VOCdevkit_sets = [('2007', 'train'), ('2007', 'val')] | ||||||
|  | classes, _ = get_classes(classes_path) | ||||||
|  | 
 | ||||||
|  | # -------------------------------------------------------# | ||||||
|  | #   统计目标数量 | ||||||
|  | # -------------------------------------------------------# | ||||||
|  | photo_nums = np.zeros(len(VOCdevkit_sets))  # 生成train的数目,val的数目 | ||||||
|  | nums = np.zeros(len(classes))  # 统计各个类别的数量 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def convert_annotation(year, image_id, list_file): | ||||||
|  |     in_file = open(os.path.join(VOCdevkit_path, 'VOC%s/Annotations/%s.xml' % (year, image_id)), encoding='utf-8')  # 'VOCdevkit\\VOC2007/Annotations/000001.xml' | ||||||
|  |     tree = ET.parse(in_file) | ||||||
|  |     root = tree.getroot() | ||||||
|  | 
 | ||||||
|  |     for obj in root.iter('object'): | ||||||
|  |         difficult = 0 | ||||||
|  |         if obj.find('difficult') != None: | ||||||
|  |             difficult = obj.find('difficult').text | ||||||
|  |         cls = obj.find('name').text | ||||||
|  |         if cls not in classes or int(difficult) == 1:  # 不在classes里或者difficult为1,跳过当前类别 | ||||||
|  |             continue | ||||||
|  |         cls_id = classes.index(cls)  # 类别对应于classes文件的下标,是类别的id属性 | ||||||
|  |         xmlbox = obj.find('bndbox') | ||||||
|  |         b = (int(float(xmlbox.find('xmin').text)), int(float(xmlbox.find('ymin').text)), | ||||||
|  |              int(float(xmlbox.find('xmax').text)), int(float(xmlbox.find('ymax').text))) | ||||||
|  |         list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id)) | ||||||
|  |         # list_file的每一行,前面先写了图片的全路径,接着一个空格,依次写各个物体的 以,分隔的坐标,和id | ||||||
|  |         nums[classes.index(cls)] = nums[classes.index(cls)] + 1  # 统计各个类别的个数 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | if __name__ == "__main__": | ||||||
|  |     random.seed(0) | ||||||
|  |     if " " in os.path.abspath(VOCdevkit_path): | ||||||
|  |         raise ValueError("数据集存放的文件夹路径与图片名称中不可以存在空格,否则会影响正常的模型训练,请注意修改。") | ||||||
|  | 
 | ||||||
|  |     if annotation_mode == 0 or annotation_mode == 1: | ||||||
|  |         print("Generate txt in ImageSets.") | ||||||
|  |         xmlfilepath = os.path.join(VOCdevkit_path, 'VOC2007/Annotations') | ||||||
|  |         saveBasePath = os.path.join(VOCdevkit_path, 'VOC2007/ImageSets/Main') | ||||||
|  |         temp_xml = os.listdir(xmlfilepath) | ||||||
|  |         total_xml = [xml for xml in temp_xml if xml.endswith(".xml")] | ||||||
|  | 
 | ||||||
|  |         num = len(total_xml)  # 取得原始数据集中的总数,从总数中划分数据集 | ||||||
|  |         list = range(num) | ||||||
|  |         tv = int(num * trainval_percent)  # 训练+验证集 总数 | ||||||
|  |         tr = int(tv * train_percent)  # 训练+验证集中 训练集的总数 | ||||||
|  |         trainval = random.sample(list, tv)  # 在总数里采样 | ||||||
|  |         train = random.sample(trainval, tr)  # 在tv中采样tr | ||||||
|  | 
 | ||||||
|  |         print("train and val size", tv) | ||||||
|  |         print("train size", tr) | ||||||
|  |         ftrainval = open(os.path.join(saveBasePath, 'trainval.txt'), 'w') | ||||||
|  |         ftest = open(os.path.join(saveBasePath, 'test.txt'), 'w') | ||||||
|  |         ftrain = open(os.path.join(saveBasePath, 'train.txt'), 'w') | ||||||
|  |         fval = open(os.path.join(saveBasePath, 'val.txt'), 'w') | ||||||
|  | 
 | ||||||
|  |         for i in list: | ||||||
|  |             name = total_xml[i][:-4] + '\n'  # 取出除了后缀的文件名字 | ||||||
|  |             if i in trainval: | ||||||
|  |                 ftrainval.write(name) | ||||||
|  |                 if i in train: | ||||||
|  |                     ftrain.write(name) | ||||||
|  |                 else: | ||||||
|  |                     fval.write(name) | ||||||
|  |             else: | ||||||
|  |                 ftest.write(name) | ||||||
|  | 
 | ||||||
|  |         ftrainval.close() | ||||||
|  |         ftrain.close() | ||||||
|  |         fval.close() | ||||||
|  |         ftest.close() | ||||||
|  |         print("Generate txt in ImageSets done.") | ||||||
|  | 
 | ||||||
|  |     if annotation_mode == 0 or annotation_mode == 2: | ||||||
|  |         print("Generate 2007_train.txt and 2007_val.txt for train.") | ||||||
|  |         type_index = 0 | ||||||
|  |         for year, image_set in VOCdevkit_sets: | ||||||
|  |             image_ids = open(os.path.join(VOCdevkit_path, 'VOC%s/ImageSets/Main/%s.txt' % (year, image_set)),  # 'VOCdevkit\\VOC2007/ImageSets/Main/train.txt' | ||||||
|  |                              encoding='utf-8').read().strip().split() | ||||||
|  |             list_file = open('%s_%s.txt' % (year, image_set), 'w', encoding='utf-8')  # '2007_train.txt' | ||||||
|  |             for image_id in image_ids: | ||||||
|  |                 list_file.write(  # 'C:\\my_code\\a_python\\YOLO_all\\yolo_v3\\VOCdevkit/VOC2007/JPEGImages/000001.jpg' | ||||||
|  |                     '%s/VOC%s/JPEGImages/%s.jpg' % (os.path.abspath(VOCdevkit_path), year, image_id))  # 文件全路径名字是拼出来的 | ||||||
|  |                 convert_annotation(year, image_id, list_file) | ||||||
|  |                 list_file.write('\n') | ||||||
|  |             photo_nums[type_index] = len(image_ids)  # 记录训练集总数和验证集总数 | ||||||
|  |             type_index += 1  # 用来标记是操作 训练集还是验证集 | ||||||
|  |             list_file.close() | ||||||
|  |         print("Generate 2007_train.txt and 2007_val.txt for train done.") | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |         def printTable(List1, List2): | ||||||
|  |             # for i in range(len(List1[0])): | ||||||
|  |             for i, _ in enumerate(List1[0]): | ||||||
|  |                 print("|", end=' ') | ||||||
|  |                 for j in range(len(List1)):  # len(List1)为2 | ||||||
|  |                     print(List1[j][i].rjust(int(List2[j])), end=' ') | ||||||
|  |                     print("|", end=' ') | ||||||
|  |                 print() | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  |         str_nums = [str(int(x)) for x in nums]  # 每个类别的数目 | ||||||
|  |         tableData = [ | ||||||
|  |             classes, str_nums  # 类别与数目对应 | ||||||
|  |         ] | ||||||
|  |         colWidths = [0] * len(tableData)  # 计算列宽,共有len(tableData)列,这里是2 | ||||||
|  |         len1 = 0 | ||||||
|  |         for i in range(len(tableData)): | ||||||
|  |             for j in range(len(tableData[i])): | ||||||
|  |                 if len(tableData[i][j]) > colWidths[i]: | ||||||
|  |                     colWidths[i] = len(tableData[i][j])  # 每列中,每个元素的最大长度赋值给colWidths | ||||||
|  |         printTable(tableData, colWidths) | ||||||
|  | 
 | ||||||
|  |         if photo_nums[0] <= 500: | ||||||
|  |             print("训练集数量小于500,属于较小的数据量,请注意设置较大的训练世代(Epoch)以满足足够的梯度下降次数(Step)。") | ||||||
|  | 
 | ||||||
|  |         if np.sum(nums) == 0: | ||||||
|  |             print("在数据集中并未获得任何目标,请注意修改classes_path对应自己的数据集,并且保证标签名字正确,否则训练将会没有任何效果!") | ||||||
|  |             print("在数据集中并未获得任何目标,请注意修改classes_path对应自己的数据集,并且保证标签名字正确,否则训练将会没有任何效果!") | ||||||
|  |             print("在数据集中并未获得任何目标,请注意修改classes_path对应自己的数据集,并且保证标签名字正确,否则训练将会没有任何效果!") | ||||||
|  |             print("(重要的事情说三遍)。") | ||||||
							
								
								
									
										41
									
								
								webcam.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								webcam.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,41 @@ | |||||||
|  | import time | ||||||
|  | 
 | ||||||
|  | import cv2 | ||||||
|  | import numpy as np | ||||||
|  | from PIL import Image | ||||||
|  | 
 | ||||||
|  | from yolo import YOLO | ||||||
|  | 
 | ||||||
|  | yolo = YOLO() | ||||||
|  | 
 | ||||||
|  | capture = cv2.VideoCapture(0) | ||||||
|  | # 1 就是外接摄像头 0 就是自己的摄像头 | ||||||
|  | ref, frame = capture.read() | ||||||
|  | fps = 0.0 | ||||||
|  | while (True): | ||||||
|  |     t1 = time.time() | ||||||
|  |     # 读取某一帧 | ||||||
|  |     ref, frame = capture.read() | ||||||
|  |     if not ref: | ||||||
|  |         break | ||||||
|  |     # 格式转变,BGRtoRGB | ||||||
|  |     frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | ||||||
|  |     # 转变成Image | ||||||
|  |     frame = Image.fromarray(np.uint8(frame)) | ||||||
|  |     # 进行检测 | ||||||
|  |     frame = np.array(yolo.detect_image(frame)) | ||||||
|  |     # RGBtoBGR满足opencv显示格式 | ||||||
|  |     frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) | ||||||
|  |     fps = (fps + (1. / (time.time() - t1))) / 2 | ||||||
|  |     # print("fps= %.2f" % (fps)) | ||||||
|  |     frame = cv2.putText(frame, "fps= %.2f" % (fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) | ||||||
|  | 
 | ||||||
|  |     cv2.imshow("video", frame) | ||||||
|  |     c = cv2.waitKey(1) & 0xff | ||||||
|  |     # print(c) | ||||||
|  |     if c == 113: | ||||||
|  |         capture.release() | ||||||
|  |         break | ||||||
|  | 
 | ||||||
|  | capture.release() | ||||||
|  | cv2.destroyAllWindows() | ||||||
							
								
								
									
										425
									
								
								yolo.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										425
									
								
								yolo.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,425 @@ | |||||||
|  | import colorsys | ||||||
|  | import os | ||||||
|  | import time | ||||||
|  | 
 | ||||||
|  | import numpy as np | ||||||
|  | import torch | ||||||
|  | import torch.nn as nn | ||||||
|  | from PIL import ImageDraw, ImageFont | ||||||
|  | 
 | ||||||
|  | from nets.yolo import YoloBody | ||||||
|  | from utils.utils import (cvtColor, get_anchors, get_classes, preprocess_input, | ||||||
|  |                          resize_image, show_config) | ||||||
|  | from utils.utils_bbox import DecodeBox | ||||||
|  | 
 | ||||||
|  | ''' | ||||||
|  | 训练自己的数据集必看注释! | ||||||
|  | ''' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class YOLO(object): | ||||||
|  |     _defaults = { | ||||||
|  |         # --------------------------------------------------------------------------# | ||||||
|  |         #   使用自己训练好的模型进行预测一定要修改model_path和classes_path! | ||||||
|  |         #   model_path指向logs文件夹下的权值文件,classes_path指向model_data下的txt | ||||||
|  |         # | ||||||
|  |         #   训练好后logs文件夹下存在多个权值文件,选择验证集损失较低的即可。 | ||||||
|  |         #   验证集损失较低不代表mAP较高,仅代表该权值在验证集上泛化性能较好。 | ||||||
|  |         #   如果出现shape不匹配,同时要注意训练时的model_path和classes_path参数的修改 | ||||||
|  |         # --------------------------------------------------------------------------# | ||||||
|  |         # "model_path": 'model_data/yolo_weights.pth', | ||||||
|  |         # "classes_path": 'model_data/coco_classes.txt', | ||||||
|  |         "model_path": 'logs/best_epoch_weights.pth', | ||||||
|  |         "classes_path": 'model_data/cctsdb_classes.txt', | ||||||
|  |         # ---------------------------------------------------------------------# | ||||||
|  |         #   anchors_path代表先验框对应的txt文件,一般不修改。 | ||||||
|  |         #   anchors_mask用于帮助代码找到对应的先验框,一般不修改。 | ||||||
|  |         # ---------------------------------------------------------------------# | ||||||
|  |         "anchors_path": 'model_data/yolo_anchors.txt', | ||||||
|  |         "anchors_mask": [[6, 7, 8], [3, 4, 5], [0, 1, 2]], | ||||||
|  |         # ---------------------------------------------------------------------# | ||||||
|  |         #   输入图片的大小,必须为32的倍数。 | ||||||
|  |         # ---------------------------------------------------------------------# | ||||||
|  |         "input_shape": [416, 416], | ||||||
|  |         # ---------------------------------------------------------------------# | ||||||
|  |         #   只有得分大于置信度的预测框会被保留下来 | ||||||
|  |         # ---------------------------------------------------------------------# | ||||||
|  |         "confidence": 0.5, | ||||||
|  |         # ---------------------------------------------------------------------# | ||||||
|  |         #   非极大抑制所用到的nms_iou大小 | ||||||
|  |         # ---------------------------------------------------------------------# | ||||||
|  |         "nms_iou": 0.3, | ||||||
|  |         # ---------------------------------------------------------------------# | ||||||
|  |         #   该变量用于控制是否使用letterbox_image对输入图像进行不失真的resize, | ||||||
|  |         #   在多次测试后,发现关闭letterbox_image直接resize的效果更好 | ||||||
|  |         # ---------------------------------------------------------------------# | ||||||
|  |         "letterbox_image": False, | ||||||
|  |         # -------------------------------# | ||||||
|  |         #   是否使用Cuda | ||||||
|  |         #   没有GPU可以设置成False | ||||||
|  |         # -------------------------------# | ||||||
|  |         "cuda": True | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     @classmethod | ||||||
|  |     def get_defaults(cls, n): | ||||||
|  |         if n in cls._defaults: | ||||||
|  |             return cls._defaults[n] | ||||||
|  |         else: | ||||||
|  |             return "Unrecognized attribute name '" + n + "'" | ||||||
|  | 
 | ||||||
|  |     # ---------------------------------------------------# | ||||||
|  |     #   初始化YOLO | ||||||
|  |     # ---------------------------------------------------# | ||||||
|  |     def __init__(self, **kwargs): | ||||||
|  |         self.__dict__.update(self._defaults)  # 用类的_defaults变量更新当前对象的属性字典 | ||||||
|  |         for name, value in kwargs.items(): | ||||||
|  |             setattr(self, name, value) | ||||||
|  | 
 | ||||||
|  |         # ---------------------------------------------------# | ||||||
|  |         #   获得种类和先验框的数量 | ||||||
|  |         # ---------------------------------------------------# | ||||||
|  |         self.class_names, self.num_classes = get_classes(self.classes_path) | ||||||
|  |         self.anchors, self.num_anchors = get_anchors(self.anchors_path) | ||||||
|  |         self.bbox_util = DecodeBox(self.anchors, self.num_classes, (self.input_shape[0], self.input_shape[1]), | ||||||
|  |                                    self.anchors_mask) | ||||||
|  | 
 | ||||||
|  |         # ---------------------------------------------------# | ||||||
|  |         #   画框设置不同的颜色 | ||||||
|  |         # ---------------------------------------------------# | ||||||
|  |         hsv_tuples = [(x / self.num_classes, 1., 1.) for x in range(self.num_classes)] | ||||||
|  |         self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) | ||||||
|  |         self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) | ||||||
|  |         self.generate() | ||||||
|  | 
 | ||||||
|  |         show_config(**self._defaults) | ||||||
|  | 
 | ||||||
|  |     # ---------------------------------------------------# | ||||||
|  |     #   生成模型 | ||||||
|  |     # ---------------------------------------------------# | ||||||
|  |     def generate(self, onnx=False): | ||||||
|  |         # ---------------------------------------------------# | ||||||
|  |         #   建立yolov3模型,载入yolov3模型的权重 | ||||||
|  |         # ---------------------------------------------------# | ||||||
|  |         self.net = YoloBody(self.anchors_mask, self.num_classes) | ||||||
|  |         device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | ||||||
|  |         self.net.load_state_dict(torch.load(self.model_path, map_location=device)) | ||||||
|  |         self.net = self.net.eval() | ||||||
|  |         print('{} model, anchors, and classes loaded.'.format(self.model_path)) | ||||||
|  |         # if not onnx: | ||||||
|  |         #     if self.cuda: | ||||||
|  |         #         self.net = nn.DataParallel(self.net) | ||||||
|  |         #         self.net = self.net.cuda() | ||||||
|  | 
 | ||||||
|  |         if not onnx: | ||||||
|  |             if self.cuda: | ||||||
|  |                 self.net = self.net.cuda() | ||||||
|  | 
 | ||||||
|  |     # ---------------------------------------------------# | ||||||
|  |     #   检测图片 | ||||||
|  |     # ---------------------------------------------------# | ||||||
|  |     def detect_image(self, image, crop=False, count=False): | ||||||
|  |         image_shape = np.array(np.shape(image)[0:2])  # np.shape(image) 的形状 h,w,c | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。 | ||||||
|  |         #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         image = cvtColor(image) | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         #   给图像增加灰条,实现不失真的resize | ||||||
|  |         #   也可以直接resize进行识别 | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         #   添加上batch_size维度 | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) | ||||||
|  |         # image_data 变换后的维度是  1, 3, 416, 416 | ||||||
|  |         with torch.no_grad(): | ||||||
|  |             images = torch.from_numpy(image_data) | ||||||
|  |             if self.cuda: | ||||||
|  |                 images = images.cuda() | ||||||
|  |             # ---------------------------------------------------------# | ||||||
|  |             #   将图像输入网络当中进行预测! | ||||||
|  |             # ---------------------------------------------------------# | ||||||
|  |             outputs = self.net(images) | ||||||
|  |             outputs = self.bbox_util.decode_box(outputs) | ||||||
|  |             # ---------------------------------------------------------# | ||||||
|  |             #   将预测框进行堆叠,然后进行非极大抑制 | ||||||
|  |             # ---------------------------------------------------------# | ||||||
|  |             results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape, | ||||||
|  |                                                          image_shape, self.letterbox_image, conf_thres=self.confidence, | ||||||
|  |                                                          nms_thres=self.nms_iou) | ||||||
|  | 
 | ||||||
|  |             if results[0] is None: | ||||||
|  |                 return image | ||||||
|  | 
 | ||||||
|  |             top_label = np.array(results[0][:, 6], dtype='int32') | ||||||
|  |             top_conf = results[0][:, 4] * results[0][:, 5] | ||||||
|  |             top_boxes = results[0][:, :4] | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         #   设置字体与边框厚度 | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         font = ImageFont.truetype(font='model_data/simhei.ttf', | ||||||
|  |                                   size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) | ||||||
|  |         thickness = int(max((image.size[0] + image.size[1]) // np.mean(self.input_shape), 1)) | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         #   计数 | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         if count: | ||||||
|  |             print("top_label:", top_label) | ||||||
|  |             classes_nums = np.zeros([self.num_classes]) | ||||||
|  |             for i in range(self.num_classes): | ||||||
|  |                 num = np.sum(top_label == i) | ||||||
|  |                 if num > 0: | ||||||
|  |                     print(self.class_names[i], " : ", num) | ||||||
|  |                 classes_nums[i] = num | ||||||
|  |             print("classes_nums:", classes_nums) | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         #   是否进行目标的裁剪 | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         if crop: | ||||||
|  |             for i, c in list(enumerate(top_label)): | ||||||
|  |                 top, left, bottom, right = top_boxes[i] | ||||||
|  |                 top = max(0, np.floor(top).astype('int32')) | ||||||
|  |                 left = max(0, np.floor(left).astype('int32')) | ||||||
|  |                 bottom = min(image.size[1], np.floor(bottom).astype('int32')) | ||||||
|  |                 right = min(image.size[0], np.floor(right).astype('int32')) | ||||||
|  | 
 | ||||||
|  |                 dir_save_path = "img_crop" | ||||||
|  |                 if not os.path.exists(dir_save_path): | ||||||
|  |                     os.makedirs(dir_save_path) | ||||||
|  |                 crop_image = image.crop([left, top, right, bottom]) | ||||||
|  |                 crop_image.save(os.path.join(dir_save_path, "crop_" + str(i) + ".png"), quality=95, subsampling=0) | ||||||
|  |                 print("save crop_" + str(i) + ".png to " + dir_save_path) | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         #   图像绘制 | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         for i, c in list(enumerate(top_label)): | ||||||
|  |             predicted_class = self.class_names[int(c)] | ||||||
|  |             box = top_boxes[i] | ||||||
|  |             score = top_conf[i] | ||||||
|  | 
 | ||||||
|  |             top, left, bottom, right = box | ||||||
|  | 
 | ||||||
|  |             top = max(0, np.floor(top).astype('int32')) | ||||||
|  |             left = max(0, np.floor(left).astype('int32')) | ||||||
|  |             bottom = min(image.size[1], np.floor(bottom).astype('int32')) | ||||||
|  |             right = min(image.size[0], np.floor(right).astype('int32')) | ||||||
|  | 
 | ||||||
|  |             label = '{} {:.2f}'.format(predicted_class, score) | ||||||
|  |             draw = ImageDraw.Draw(image) | ||||||
|  |             label_size = draw.textsize(label, font) | ||||||
|  |             label = label.encode('utf-8') | ||||||
|  |             # print(label, top, left, bottom, right) | ||||||
|  | 
 | ||||||
|  |             if top - label_size[1] >= 0:  # 框到顶的距离大于 label_size,就是可以在顶部放标签 | ||||||
|  |                 text_origin = np.array([left, top - label_size[1]]) | ||||||
|  |             else:  # 否则放在框内部 | ||||||
|  |                 text_origin = np.array([left, top + 1]) | ||||||
|  | 
 | ||||||
|  |             for i in range(thickness):  # 画粗细的实现?是画6次? | ||||||
|  |                 draw.rectangle([left + i, top + i, right - i, bottom - i], outline=self.colors[c]) | ||||||
|  |             draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[c]) | ||||||
|  |             draw.text(text_origin, str(label, 'UTF-8'), fill=(0, 0, 0), font=font) | ||||||
|  |             del draw | ||||||
|  | 
 | ||||||
|  |         return image | ||||||
|  | 
 | ||||||
|  |     def get_FPS(self, image, test_interval): | ||||||
|  |         image_shape = np.array(np.shape(image)[0:2]) | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。 | ||||||
|  |         #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         image = cvtColor(image) | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         #   给图像增加灰条,实现不失真的resize | ||||||
|  |         #   也可以直接resize进行识别 | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         #   添加上batch_size维度 | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) | ||||||
|  | 
 | ||||||
|  |         with torch.no_grad(): | ||||||
|  |             images = torch.from_numpy(image_data) | ||||||
|  |             if self.cuda: | ||||||
|  |                 images = images.cuda() | ||||||
|  |             # ---------------------------------------------------------# | ||||||
|  |             #   将图像输入网络当中进行预测! | ||||||
|  |             # ---------------------------------------------------------# | ||||||
|  |             outputs = self.net(images) | ||||||
|  |             outputs = self.bbox_util.decode_box(outputs) | ||||||
|  |             # ---------------------------------------------------------# | ||||||
|  |             #   将预测框进行堆叠,然后进行非极大抑制 | ||||||
|  |             # ---------------------------------------------------------# | ||||||
|  |             results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape, | ||||||
|  |                                                          image_shape, self.letterbox_image, conf_thres=self.confidence, | ||||||
|  |                                                          nms_thres=self.nms_iou) | ||||||
|  | 
 | ||||||
|  |         t1 = time.time() | ||||||
|  |         for _ in range(test_interval): | ||||||
|  |             with torch.no_grad(): | ||||||
|  |                 # ---------------------------------------------------------# | ||||||
|  |                 #   将图像输入网络当中进行预测! | ||||||
|  |                 # ---------------------------------------------------------# | ||||||
|  |                 outputs = self.net(images) | ||||||
|  |                 outputs = self.bbox_util.decode_box(outputs) | ||||||
|  |                 # ---------------------------------------------------------# | ||||||
|  |                 #   将预测框进行堆叠,然后进行非极大抑制 | ||||||
|  |                 # ---------------------------------------------------------# | ||||||
|  |                 results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape, | ||||||
|  |                                                              image_shape, self.letterbox_image, | ||||||
|  |                                                              conf_thres=self.confidence, nms_thres=self.nms_iou) | ||||||
|  | 
 | ||||||
|  |         t2 = time.time() | ||||||
|  |         tact_time = (t2 - t1) / test_interval | ||||||
|  |         return tact_time | ||||||
|  | 
 | ||||||
|  |     def detect_heatmap(self, image, heatmap_save_path): | ||||||
|  |         import cv2 | ||||||
|  |         import matplotlib.pyplot as plt | ||||||
|  |         def sigmoid(x): | ||||||
|  |             y = 1.0 / (1.0 + np.exp(-x)) | ||||||
|  |             return y | ||||||
|  | 
 | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。 | ||||||
|  |         #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         image = cvtColor(image) | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         #   给图像增加灰条,实现不失真的resize | ||||||
|  |         #   也可以直接resize进行识别 | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         #   添加上batch_size维度 | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) | ||||||
|  | 
 | ||||||
|  |         with torch.no_grad(): | ||||||
|  |             images = torch.from_numpy(image_data) | ||||||
|  |             if self.cuda: | ||||||
|  |                 images = images.cuda() | ||||||
|  |             # ---------------------------------------------------------# | ||||||
|  |             #   将图像输入网络当中进行预测! | ||||||
|  |             # ---------------------------------------------------------# | ||||||
|  |             outputs = self.net(images) | ||||||
|  | 
 | ||||||
|  |         plt.imshow(image, alpha=1) | ||||||
|  |         plt.axis('off') | ||||||
|  |         mask = np.zeros((image.size[1], image.size[0])) | ||||||
|  |         for sub_output in outputs: | ||||||
|  |             sub_output = sub_output.cpu().numpy() | ||||||
|  |             b, c, h, w = np.shape(sub_output) | ||||||
|  |             sub_output = np.transpose(np.reshape(sub_output, [b, 3, -1, h, w]), [0, 3, 4, 1, 2])[0] | ||||||
|  |             score = np.max(sigmoid(sub_output[..., 4]), -1) | ||||||
|  |             score = cv2.resize(score, (image.size[0], image.size[1])) | ||||||
|  |             normed_score = (score * 255).astype('uint8') | ||||||
|  |             mask = np.maximum(mask, normed_score) | ||||||
|  | 
 | ||||||
|  |         plt.imshow(mask, alpha=0.5, interpolation='nearest', cmap="jet") | ||||||
|  | 
 | ||||||
|  |         plt.axis('off') | ||||||
|  |         plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0) | ||||||
|  |         plt.margins(0, 0) | ||||||
|  |         plt.savefig(heatmap_save_path, dpi=200, bbox_inches='tight', pad_inches=-0.1) | ||||||
|  |         print("Save to the " + heatmap_save_path) | ||||||
|  |         plt.show() | ||||||
|  | 
 | ||||||
|  |     def convert_to_onnx(self, simplify, model_path): | ||||||
|  |         import onnx | ||||||
|  |         self.generate(onnx=True) | ||||||
|  | 
 | ||||||
|  |         im = torch.zeros(1, 3, *self.input_shape).to('cpu')  # image size(1, 3, 512, 512) BCHW | ||||||
|  |         input_layer_names = ["images"] | ||||||
|  |         output_layer_names = ["output"] | ||||||
|  | 
 | ||||||
|  |         # Export the model | ||||||
|  |         print(f'Starting export with onnx {onnx.__version__}.') | ||||||
|  |         torch.onnx.export(self.net, | ||||||
|  |                           im, | ||||||
|  |                           f=model_path, | ||||||
|  |                           verbose=False, | ||||||
|  |                           opset_version=12, | ||||||
|  |                           training=torch.onnx.TrainingMode.EVAL, | ||||||
|  |                           do_constant_folding=True, | ||||||
|  |                           input_names=input_layer_names, | ||||||
|  |                           output_names=output_layer_names, | ||||||
|  |                           dynamic_axes=None) | ||||||
|  | 
 | ||||||
|  |         # Checks | ||||||
|  |         model_onnx = onnx.load(model_path)  # load onnx model | ||||||
|  |         onnx.checker.check_model(model_onnx)  # check onnx model | ||||||
|  | 
 | ||||||
|  |         # Simplify onnx | ||||||
|  |         if simplify: | ||||||
|  |             import onnxsim | ||||||
|  |             print(f'Simplifying with onnx-simplifier {onnxsim.__version__}.') | ||||||
|  |             model_onnx, check = onnxsim.simplify( | ||||||
|  |                 model_onnx, | ||||||
|  |                 dynamic_input_shape=False, | ||||||
|  |                 input_shapes=None) | ||||||
|  |             assert check, 'assert check failed' | ||||||
|  |             onnx.save(model_onnx, model_path) | ||||||
|  | 
 | ||||||
|  |         print('Onnx model save as {}'.format(model_path)) | ||||||
|  | 
 | ||||||
|  |     def get_map_txt(self, image_id, image, class_names, map_out_path): | ||||||
|  |         f = open(os.path.join(map_out_path, "detection-results/" + image_id + ".txt"), "w") | ||||||
|  |         image_shape = np.array(np.shape(image)[0:2]) | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         #   在这里将图像转换成RGB图像,防止灰度图在预测时报错。 | ||||||
|  |         #   代码仅仅支持RGB图像的预测,所有其它类型的图像都会转化成RGB | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         image = cvtColor(image) | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         #   给图像增加灰条,实现不失真的resize | ||||||
|  |         #   也可以直接resize进行识别 | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         image_data = resize_image(image, (self.input_shape[1], self.input_shape[0]), self.letterbox_image) | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         #   添加上batch_size维度 | ||||||
|  |         # ---------------------------------------------------------# | ||||||
|  |         image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, dtype='float32')), (2, 0, 1)), 0) | ||||||
|  | 
 | ||||||
|  |         with torch.no_grad(): | ||||||
|  |             images = torch.from_numpy(image_data) | ||||||
|  |             if self.cuda: | ||||||
|  |                 images = images.cuda() | ||||||
|  |             # ---------------------------------------------------------# | ||||||
|  |             #   将图像输入网络当中进行预测! | ||||||
|  |             # ---------------------------------------------------------# | ||||||
|  |             outputs = self.net(images) | ||||||
|  |             outputs = self.bbox_util.decode_box(outputs) | ||||||
|  |             # ---------------------------------------------------------# | ||||||
|  |             #   将预测框进行堆叠,然后进行非极大抑制 | ||||||
|  |             # ---------------------------------------------------------# | ||||||
|  |             results = self.bbox_util.non_max_suppression(torch.cat(outputs, 1), self.num_classes, self.input_shape, | ||||||
|  |                                                          image_shape, self.letterbox_image, conf_thres=self.confidence, | ||||||
|  |                                                          nms_thres=self.nms_iou) | ||||||
|  | 
 | ||||||
|  |             if results[0] is None: | ||||||
|  |                 return | ||||||
|  | 
 | ||||||
|  |             top_label = np.array(results[0][:, 6], dtype='int32') | ||||||
|  |             top_conf = results[0][:, 4] * results[0][:, 5] | ||||||
|  |             top_boxes = results[0][:, :4] | ||||||
|  | 
 | ||||||
|  |         for i, c in list(enumerate(top_label)): | ||||||
|  |             predicted_class = self.class_names[int(c)] | ||||||
|  |             box = top_boxes[i] | ||||||
|  |             score = str(top_conf[i]) | ||||||
|  | 
 | ||||||
|  |             top, left, bottom, right = box | ||||||
|  |             if predicted_class not in class_names: | ||||||
|  |                 continue | ||||||
|  | 
 | ||||||
|  |             f.write("%s %s %s %s %s %s\n" % ( | ||||||
|  |                 predicted_class, score[:6], str(int(left)), str(int(top)), str(int(right)), str(int(bottom)))) | ||||||
|  | 
 | ||||||
|  |         f.close() | ||||||
|  |         return | ||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user