看了大神統(tǒng)計(jì)voc數(shù)據(jù)集標(biāo)簽框后,針對(duì)自己標(biāo)注數(shù)據(jù)集,靈活應(yīng)用 ,感謝!
看代碼吧~
import re
import os
import xml.etree.ElementTree as ET
class1 = 'answer'
class2 = 'hand'
class3 = 'write'
class4 = 'music'
class5 = 'phone'
'''class6 = 'bus'
class7 = 'car'
class8 = 'cat'
class9 = 'chair'
class10 = 'cow'
class11 = 'diningtable'
class12 = 'dog'
class13 = 'horse'
class14 = 'motorbike'
class15 = 'person'
class16 = 'pottedplant'
class17 = 'sheep'
class18 = 'sofa'
class19 = 'train'
class20 = 'tvmonitor'
'''
annotation_folder = '/home/.../train/' #改為自己標(biāo)簽文件夾的路徑
#annotation_folder = '/home/.../VOC2007/Annotations/'
list = os.listdir(annotation_folder)
def file_name(file_dir):
L = []
for root, dirs, files in os.walk(file_dir):
for file in files:
if os.path.splitext(file)[1] == '.xml':
L.append(os.path.join(root, file))
return L
total_number1 = 0
total_number2 = 0
total_number3 = 0
total_number4 = 0
total_number5 = 0
'''total_number6 = 0
total_number7 = 0
total_number8 = 0
total_number9 = 0
total_number10 = 0
total_number11 = 0
total_number12 = 0
total_number13 = 0
total_number14 = 0
total_number15 = 0
total_number16 = 0
total_number17 = 0
total_number18 = 0
total_number19 = 0
total_number20 = 0'''
total = 0
total_pic=0
pic_num1 = 0
pic_num2 = 0
pic_num3 = 0
pic_num4 = 0
pic_num5 = 0
'''pic_num6 = 0
pic_num7 = 0
pic_num8 = 0
pic_num9 = 0
pic_num10 = 0
pic_num11 = 0
pic_num12 = 0
pic_num13 = 0
pic_num14 = 0
pic_num15 = 0
pic_num16 = 0
pic_num17 = 0
pic_num18 = 0
pic_num19 = 0
pic_num20 = 0'''
flag1 = 0
flag2 = 0
flag3 = 0
flag4 = 0
flag5 = 0
'''flag6 = 0
flag7 = 0
flag8 = 0
flag9 = 0
flag10 = 0
flag11 = 0
flag12 = 0
flag13 = 0
flag14 = 0
flag15= 0
flag16 = 0
flag17 = 0
flag18 = 0
flag19 = 0
flag20 = 0'''
xml_dirs = file_name(annotation_folder)
for i in range(0, len(xml_dirs)):
print(xml_dirs[i])
#path = os.path.join(annotation_folder,list[i])
#print(path)
annotation_file = open(xml_dirs[i]).read()
root = ET.fromstring(annotation_file)
#tree = ET.parse(annotation_file)
#root = tree.getroot()
total_pic = total_pic + 1
for obj in root.findall('object'):
label = obj.find('name').text
if label == class1:
total_number1=total_number1+1
flag1=1
total = total + 1
#print("bounding box number:", total_number1)
if label == class2:
total_number2=total_number2+1
flag2=1
total = total + 1
if label == class3:
total_number3=total_number3+1
flag3=1
total = total + 1
if label == class4:
total_number4=total_number4+1
flag4=1
total = total + 1
if label == class5:
total_number5=total_number5+1
flag5=1
total = total + 1
'''if label == class6:
total_number6=total_number6+1
flag6=1
total = total + 1
if label == class7:
total_number7=total_number7+1
flag7=1
total = total + 1
if label == class8:
total_number8=total_number8+1
flag8=1
total = total + 1
if label == class9:
total_number9=total_number9+1
flag9=1
total = total + 1
if label == class10:
total_number10=total_number10+1
flag10=1
total = total + 1
if label == class11:
total_number11=total_number11+1
flag11=1
total = total + 1
if label == class12:
total_number12=total_number12+1
flag12=1
total = total + 1
if label == class13:
total_number13=total_number13+1
flag13=1
total = total + 1
if label == class14:
total_number14=total_number14+1
flag14=1
total = total + 1
if label == class15:
total_number15=total_number15+1
flag15=1
total = total + 1
if label == class16:
total_number16=total_number16+1
flag16=1
total = total + 1
if label == class17:
total_number17=total_number17+1
flag17=1
total = total + 1
if label == class18:
total_number18=total_number18+1
flag18=1
total = total + 1
if label == class19:
total_number19=total_number19+1
flag19=1
total = total + 1
if label == class20:
total_number20=total_number20+1
flag20=1
total = total + 1'''
if flag1==1:
pic_num1=pic_num1+1
#print("pic number:", pic_num1)
flag1=0
if flag2==1:
pic_num2=pic_num2+1
flag2=0
if flag3==1:
pic_num3=pic_num3+1
flag3=0
if flag4==1:
pic_num4=pic_num4+1
flag4=0
if flag5==1:
pic_num5=pic_num5+1
flag5=0
'''if flag6==1:
pic_num6=pic_num6+1
flag6=0
if flag7==1:
pic_num7=pic_num7+1
flag7=0
if flag8==1:
pic_num8=pic_num8+1
flag8=0
if flag9==1:
pic_num9=pic_num9+1
flag9=0
if flag10==1:
pic_num10=pic_num10+1
flag10=0
if flag11==1:
pic_num11=pic_num11+1
flag11=0
if flag12==1:
pic_num12=pic_num12+1
flag12=0
if flag13==1:
pic_num13=pic_num13+1
flag13=0
if flag14==1:
pic_num14=pic_num14+1
flag14=0
if flag15==1:
pic_num15=pic_num15+1
flag15=0
if flag16==1:
pic_num16=pic_num16+1
flag16=0
if flag17==1:
pic_num17=pic_num17+1
flag17=0
if flag18==1:
pic_num18=pic_num18+1
flag18=0
if flag19==1:
pic_num19=pic_num19+1
flag19=0
if flag20==1:
pic_num20=pic_num20+1
flag20=0'''
print(class1,pic_num1,total_number1)
print(class2,pic_num2,total_number2)
print(class3,pic_num3, total_number3)
print(class4,pic_num4, total_number4)
print(class5,pic_num5, total_number5)
'''print(class6,pic_num6, total_number6)
print(class7,pic_num7, total_number7)
print(class8,pic_num8, total_number8)
print(class9,pic_num9, total_number9)
print(class10,pic_num10, total_number10)
print(class11,pic_num11,total_number11)
print(class12,pic_num12,total_number12)
print(class13,pic_num13, total_number13)
print(class14,pic_num14, total_number14)
print(class15,pic_num15, total_number15)
print(class16,pic_num16, total_number16)
print(class17,pic_num17, total_number17)
print(class18,pic_num18, total_number18)
print(class19,pic_num19, total_number19)
print(class20,pic_num20, total_number20)'''
print("total", total_pic, total)
補(bǔ)充:【數(shù)據(jù)集處理】Python對(duì)目標(biāo)檢測(cè)數(shù)據(jù)集xml文件操作(統(tǒng)計(jì)目標(biāo)種類、數(shù)量、面積、比例等修改目標(biāo)名字)
1. 根據(jù)xml文件統(tǒng)計(jì)目標(biāo)種類以及數(shù)量
# -*- coding:utf-8 -*-
#根據(jù)xml文件統(tǒng)計(jì)目標(biāo)種類以及數(shù)量
import os
import xml.etree.ElementTree as ET
import numpy as np
np.set_printoptions(suppress=True, threshold=np.nan)
import matplotlib
from PIL import Image
def parse_obj(xml_path, filename):
tree=ET.parse(xml_path+filename)
objects=[]
for obj in tree.findall('object'):
obj_struct={}
obj_struct['name']=obj.find('name').text
objects.append(obj_struct)
return objects
def read_image(image_path, filename):
im=Image.open(image_path+filename)
W=im.size[0]
H=im.size[1]
area=W*H
im_info=[W,H,area]
return im_info
if __name__ == '__main__':
xml_path='/home/dlut/網(wǎng)絡(luò)/make_database/數(shù)據(jù)集——合集/VOCdevkit/VOC2018/Annotations/'
filenamess=os.listdir(xml_path)
filenames=[]
for name in filenamess:
name=name.replace('.xml','')
filenames.append(name)
recs={}
obs_shape={}
classnames=[]
num_objs={}
obj_avg={}
for i,name in enumerate(filenames):
recs[name]=parse_obj(xml_path, name+ '.xml' )
for name in filenames:
for object in recs[name]:
if object['name'] not in num_objs.keys():
num_objs[object['name']]=1
else:
num_objs[object['name']]+=1
if object['name'] not in classnames:
classnames.append(object['name'])
for name in classnames:
print('{}:{}個(gè)'.format(name,num_objs[name]))
print('信息統(tǒng)計(jì)算完畢。')
2.根據(jù)xml文件統(tǒng)計(jì)目標(biāo)的平均長(zhǎng)度、寬度、面積以及每一個(gè)目標(biāo)在原圖中的占比
# -*- coding:utf-8 -*-
#統(tǒng)計(jì)
# 計(jì)算每一個(gè)目標(biāo)在原圖中的占比
# 計(jì)算目標(biāo)的平均長(zhǎng)度、
# 計(jì)算平均寬度,
# 計(jì)算平均面積、
# 計(jì)算目標(biāo)平均占比
import os
import xml.etree.ElementTree as ET
import numpy as np
#np.set_printoptions(suppress=True, threshold=np.nan) #10,000,000
np.set_printoptions(suppress=True, threshold=10000000) #10,000,000
import matplotlib
from PIL import Image
def parse_obj(xml_path, filename):
tree = ET.parse(xml_path + filename)
objects = []
for obj in tree.findall('object'):
obj_struct = {}
obj_struct['name'] = obj.find('name').text
bbox = obj.find('bndbox')
obj_struct['bbox'] = [int(bbox.find('xmin').text),
int(bbox.find('ymin').text),
int(bbox.find('xmax').text),
int(bbox.find('ymax').text)]
objects.append(obj_struct)
return objects
def read_image(image_path, filename):
im = Image.open(image_path + filename)
W = im.size[0]
H = im.size[1]
area = W * H
im_info = [W, H, area]
return im_info
if __name__ == '__main__':
image_path = '/home/dlut/網(wǎng)絡(luò)/make_database/數(shù)據(jù)集——合集/VOCdevkit/VOC2018/JPEGImages/'
xml_path = '/home/dlut/網(wǎng)絡(luò)/make_database/數(shù)據(jù)集——合集/VOCdevkit/VOC2018/Annotations/'
filenamess = os.listdir(xml_path)
filenames = []
for name in filenamess:
name = name.replace('.xml', '')
filenames.append(name)
print(filenames)
recs = {}
ims_info = {}
obs_shape = {}
classnames = []
num_objs={}
obj_avg = {}
for i, name in enumerate(filenames):
print('正在處理 {}.xml '.format(name))
recs[name] = parse_obj(xml_path, name + '.xml')
print('正在處理 {}.jpg '.format(name))
ims_info[name] = read_image(image_path, name + '.jpg')
print('所有信息收集完畢。')
print('正在處理信息......')
for name in filenames:
im_w = ims_info[name][0]
im_h = ims_info[name][1]
im_area = ims_info[name][2]
for object in recs[name]:
if object['name'] not in num_objs.keys():
num_objs[object['name']] = 1
else:
num_objs[object['name']] += 1
#num_objs += 1
ob_w = object['bbox'][2] - object['bbox'][0]
ob_h = object['bbox'][3] - object['bbox'][1]
ob_area = ob_w * ob_h
w_rate = ob_w / im_w
h_rate = ob_h / im_h
area_rate = ob_area / im_area
if not object['name'] in obs_shape.keys():
obs_shape[object['name']] = ([[ob_w,
ob_h,
ob_area,
w_rate,
h_rate,
area_rate]])
else:
obs_shape[object['name']].append([ob_w,
ob_h,
ob_area,
w_rate,
h_rate,
area_rate])
if object['name'] not in classnames:
classnames.append(object['name']) # 求平均
for name in classnames:
obj_avg[name] = (np.array(obs_shape[name]).sum(axis=0)) / num_objs[name]
print('{}的情況如下:*******\n'.format(name))
print(' 目標(biāo)平均W={}'.format(obj_avg[name][0]))
print(' 目標(biāo)平均H={}'.format(obj_avg[name][1]))
print(' 目標(biāo)平均area={}'.format(obj_avg[name][2]))
print(' 目標(biāo)平均與原圖的W比例={}'.format(obj_avg[name][3]))
print(' 目標(biāo)平均與原圖的H比例={}'.format(obj_avg[name][4]))
print(' 目標(biāo)平均原圖面積占比={}\n'.format(obj_avg[name][5]))
print('信息統(tǒng)計(jì)計(jì)算完畢。')
3.修改xml文件中某個(gè)目標(biāo)的名字為另一個(gè)名字
#修改xml文件中的目標(biāo)的名字,
import os, sys
import glob
from xml.etree import ElementTree as ET
# 批量讀取Annotations下的xml文件
# per=ET.parse(r'C:\Users\rockhuang\Desktop\Annotations\000003.xml')
xml_dir = r'/home/dlut/網(wǎng)絡(luò)/make_database/數(shù)據(jù)集——合集/VOCdevkit/VOC2018/Annotations'
xml_list = glob.glob(xml_dir + '/*.xml')
for xml in xml_list:
print(xml)
per = ET.parse(xml)
p = per.findall('/object')
for oneper in p: # 找出person節(jié)點(diǎn)
child = oneper.getchildren()[0] # 找出person節(jié)點(diǎn)的子節(jié)點(diǎn)
if child.text == 'PinNormal': #需要修改的名字
child.text = 'normal bolt' #修改成什么名字
if child.text == 'PinDefect': #需要修改的名字
child.text = 'defect bolt-1' #修改成什么名字
per.write(xml)
print(child.tag, ':', child.text)
修改為:
以上為個(gè)人經(jīng)驗(yàn),希望能給大家一個(gè)參考,也希望大家多多支持腳本之家。
您可能感興趣的文章:- Python統(tǒng)計(jì)可散列的對(duì)象之容器Counter詳解
- Python 統(tǒng)計(jì)列表中重復(fù)元素的個(gè)數(shù)并返回其索引值的實(shí)現(xiàn)方法
- Python實(shí)戰(zhàn)之單詞打卡統(tǒng)計(jì)
- python之cur.fetchall與cur.fetchone提取數(shù)據(jù)并統(tǒng)計(jì)處理操作
- python自動(dòng)統(tǒng)計(jì)zabbix系統(tǒng)監(jiān)控覆蓋率的示例代碼
- python 統(tǒng)計(jì)代碼耗時(shí)的幾種方法分享
- Python統(tǒng)計(jì)列表元素出現(xiàn)次數(shù)的方法示例
- python統(tǒng)計(jì)RGB圖片某像素的個(gè)數(shù)案例
- Python jieba 中文分詞與詞頻統(tǒng)計(jì)的操作
- 利用Python3實(shí)現(xiàn)統(tǒng)計(jì)大量單詞中各字母出現(xiàn)的次數(shù)和頻率的方法
- 使用Python 統(tǒng)計(jì)文件夾內(nèi)所有pdf頁(yè)數(shù)的小工具
- python 統(tǒng)計(jì)list中各個(gè)元素出現(xiàn)的次數(shù)的幾種方法
- python調(diào)用百度AI接口實(shí)現(xiàn)人流量統(tǒng)計(jì)
- Python代碼覆蓋率統(tǒng)計(jì)工具coverage.py用法詳解
- python 爬蟲基本使用——統(tǒng)計(jì)杭電oj題目正確率并排序
- 利用python匯總統(tǒng)計(jì)多張Excel
- python統(tǒng)計(jì)mysql數(shù)據(jù)量變化并調(diào)用接口告警的示例代碼
- 用python實(shí)現(xiàn)監(jiān)控視頻人數(shù)統(tǒng)計(jì)