• 企业400电话
  • 微网小程序
  • AI电话机器人
  • 电商代运营
  • 全 部 栏 目

    企业400电话 网络优化推广 AI电话机器人 呼叫中心 网站建设 商标✡知产 微网小程序 电商运营 彩铃•短信 增值拓展业务
    Python 统计数据集标签的类别及数目操作

    看了大神统计voc数据集标签框后,针对自己标注数据集,灵活应用 ,感谢!

    看代码吧~

    import re
    import os
    import xml.etree.ElementTree as ET
    class1 = 'answer'
    class2 = 'hand'
    class3 = 'write'
    class4 = 'music'
    class5 = 'phone'
    '''class6 = 'bus'
    class7 = 'car'
    class8 = 'cat'
    class9 = 'chair'
    class10 = 'cow'
    class11 = 'diningtable'
    class12 = 'dog'
    class13 = 'horse'
    class14 = 'motorbike'
    class15 = 'person'
    class16 = 'pottedplant'
    class17 = 'sheep'
    class18 = 'sofa'
    class19 = 'train'
    class20 = 'tvmonitor'
    '''
    annotation_folder = '/home/.../train/'		#改为自己标签文件夹的路径
    #annotation_folder = '/home/.../VOC2007/Annotations/'
    list = os.listdir(annotation_folder)
      
    def file_name(file_dir):
    	L = []
    	for root, dirs, files in os.walk(file_dir):
    		for file in files:
    			if os.path.splitext(file)[1] == '.xml':
    				L.append(os.path.join(root, file))
    	return L
      
    total_number1 = 0
    total_number2 = 0
    total_number3 = 0
    total_number4 = 0
    total_number5 = 0
    '''total_number6 = 0
    total_number7 = 0
    total_number8 = 0
    total_number9 = 0
    total_number10 = 0
    total_number11 = 0
    total_number12 = 0
    total_number13 = 0
    total_number14 = 0
    total_number15 = 0
    total_number16 = 0
    total_number17 = 0
    total_number18 = 0
    total_number19 = 0
    total_number20 = 0'''
    total = 0
    total_pic=0
     
    pic_num1 = 0
    pic_num2 = 0
    pic_num3 = 0
    pic_num4 = 0
    pic_num5 = 0
    '''pic_num6 = 0
    pic_num7 = 0
    pic_num8 = 0
    pic_num9 = 0
    pic_num10 = 0
    pic_num11 = 0
    pic_num12 = 0
    pic_num13 = 0
    pic_num14 = 0
    pic_num15 = 0
    pic_num16 = 0
    pic_num17 = 0
    pic_num18 = 0
    pic_num19 = 0
    pic_num20 = 0'''
     
    flag1 = 0
    flag2 = 0
    flag3 = 0
    flag4 = 0
    flag5 = 0
    '''flag6 = 0
    flag7 = 0
    flag8 = 0
    flag9 = 0
    flag10 = 0
    flag11 = 0
    flag12 = 0
    flag13 = 0
    flag14 = 0
    flag15= 0
    flag16 = 0
    flag17 = 0
    flag18 = 0
    flag19 = 0
    flag20 = 0'''
     
    xml_dirs = file_name(annotation_folder) 
    for i in range(0, len(xml_dirs)):
    	print(xml_dirs[i])
    	#path = os.path.join(annotation_folder,list[i])
    	#print(path)
     
    	annotation_file = open(xml_dirs[i]).read()
     
    	root = ET.fromstring(annotation_file)
    	#tree = ET.parse(annotation_file)
    	#root = tree.getroot()
     
    	total_pic = total_pic + 1
    	for obj in root.findall('object'):
    		label = obj.find('name').text
    		if label == class1:
    			total_number1=total_number1+1
    			flag1=1
    			total = total + 1
    			#print("bounding box number:", total_number1)
    		if label == class2:
    			total_number2=total_number2+1
    			flag2=1
    			total = total + 1
    		if label == class3:
    			total_number3=total_number3+1
    			flag3=1
    			total = total + 1
    		if label == class4:
    			total_number4=total_number4+1
    			flag4=1
    			total = total + 1
    		if label == class5:
    			total_number5=total_number5+1
    			flag5=1
    			total = total + 1
    		'''if label == class6:
    			total_number6=total_number6+1
    			flag6=1
    			total = total + 1
    		if label == class7:
    			total_number7=total_number7+1
    			flag7=1
    			total = total + 1
    		if label == class8:
    			total_number8=total_number8+1
    			flag8=1
    			total = total + 1
    		if label == class9:
    			total_number9=total_number9+1
    			flag9=1
    			total = total + 1
    		if label == class10:
    			total_number10=total_number10+1
    			flag10=1
    			total = total + 1
    		if label == class11:
    			total_number11=total_number11+1
    			flag11=1
    			total = total + 1
    		if label == class12:
    			total_number12=total_number12+1
    			flag12=1
    			total = total + 1
    		if label == class13:
    			total_number13=total_number13+1
    			flag13=1
    			total = total + 1
    		if label == class14:
    			total_number14=total_number14+1
    			flag14=1
    			total = total + 1
    		if label == class15:
    			total_number15=total_number15+1
    			flag15=1
    			total = total + 1
    		if label == class16:
    			total_number16=total_number16+1
    			flag16=1
    			total = total + 1
    		if label == class17:
    			total_number17=total_number17+1
    			flag17=1
    			total = total + 1
    		if label == class18:
    			total_number18=total_number18+1
    			flag18=1
    			total = total + 1
    		if label == class19:
    			total_number19=total_number19+1
    			flag19=1
    			total = total + 1
    		if label == class20:
    			total_number20=total_number20+1
    			flag20=1
    			total = total + 1'''
     
    	if flag1==1:
    		pic_num1=pic_num1+1
    		#print("pic number:", pic_num1)
    		flag1=0
    	if flag2==1:
    		pic_num2=pic_num2+1
    		flag2=0
    	if flag3==1:
    		pic_num3=pic_num3+1
    		flag3=0
    	if flag4==1:
    		pic_num4=pic_num4+1
    		flag4=0
    	if flag5==1:
    		pic_num5=pic_num5+1
    		flag5=0
    	'''if flag6==1:
    		pic_num6=pic_num6+1
    		flag6=0
    	if flag7==1:
    		pic_num7=pic_num7+1
    		flag7=0
    	if flag8==1:
    		pic_num8=pic_num8+1
    		flag8=0
    	if flag9==1:
    		pic_num9=pic_num9+1
    		flag9=0
    	if flag10==1:
    		pic_num10=pic_num10+1
    		flag10=0
    	if flag11==1:
    		pic_num11=pic_num11+1
    		flag11=0
    	if flag12==1:
    		pic_num12=pic_num12+1
    		flag12=0
    	if flag13==1:
    		pic_num13=pic_num13+1
    		flag13=0
    	if flag14==1:
    		pic_num14=pic_num14+1
    		flag14=0
    	if flag15==1:
    		pic_num15=pic_num15+1
    		flag15=0
    	if flag16==1:
    		pic_num16=pic_num16+1
    		flag16=0
    	if flag17==1:
    		pic_num17=pic_num17+1
    		flag17=0
    	if flag18==1:
    		pic_num18=pic_num18+1
    		flag18=0
    	if flag19==1:
    		pic_num19=pic_num19+1
    		flag19=0
    	if flag20==1:
    		pic_num20=pic_num20+1
    		flag20=0'''
      
    print(class1,pic_num1,total_number1)
    print(class2,pic_num2,total_number2)
    print(class3,pic_num3, total_number3)
    print(class4,pic_num4, total_number4)
    print(class5,pic_num5, total_number5)
    '''print(class6,pic_num6, total_number6)
    print(class7,pic_num7, total_number7)
    print(class8,pic_num8, total_number8)
    print(class9,pic_num9, total_number9)
    print(class10,pic_num10, total_number10)
    print(class11,pic_num11,total_number11)
    print(class12,pic_num12,total_number12)
    print(class13,pic_num13, total_number13)
    print(class14,pic_num14, total_number14)
    print(class15,pic_num15, total_number15)
    print(class16,pic_num16, total_number16)
    print(class17,pic_num17, total_number17)
    print(class18,pic_num18, total_number18)
    print(class19,pic_num19, total_number19)
    print(class20,pic_num20, total_number20)'''
     
    print("total", total_pic, total)
     

    补充:【数据集处理】Python对目标检测数据集xml文件操作(统计目标种类、数量、面积、比例等修改目标名字)

    1. 根据xml文件统计目标种类以及数量

    # -*- coding:utf-8 -*-
    #根据xml文件统计目标种类以及数量
    import os
    import xml.etree.ElementTree as ET
    import numpy as np
    np.set_printoptions(suppress=True, threshold=np.nan)
    import matplotlib
    from PIL import Image
     
    def parse_obj(xml_path, filename):
      tree=ET.parse(xml_path+filename)
      objects=[]
      for obj in tree.findall('object'):
        obj_struct={}
        obj_struct['name']=obj.find('name').text
        objects.append(obj_struct)
      return objects
      
    def read_image(image_path, filename):
      im=Image.open(image_path+filename)
      W=im.size[0]
      H=im.size[1]
      area=W*H
      im_info=[W,H,area]
      return im_info
      
    if __name__ == '__main__':
      xml_path='/home/dlut/网络/make_database/数据集——合集/VOCdevkit/VOC2018/Annotations/'
      filenamess=os.listdir(xml_path)
      filenames=[]
      for name in filenamess:
        name=name.replace('.xml','')
        filenames.append(name)
      recs={}
      obs_shape={}
      classnames=[]
      num_objs={}
      obj_avg={}
      for i,name in enumerate(filenames):
        recs[name]=parse_obj(xml_path, name+ '.xml' )
      for name in filenames:
        for object in recs[name]:
          if object['name'] not in num_objs.keys():
             num_objs[object['name']]=1
          else:
             num_objs[object['name']]+=1
          if object['name'] not in classnames:
             classnames.append(object['name'])
      for name in classnames:
        print('{}:{}个'.format(name,num_objs[name]))
      print('信息统计算完毕。')
    

    2.根据xml文件统计目标的平均长度、宽度、面积以及每一个目标在原图中的占比

    # -*- coding:utf-8 -*-
    #统计
    # 计算每一个目标在原图中的占比
    # 计算目标的平均长度、
    # 计算平均宽度,
    # 计算平均面积、
    # 计算目标平均占比
    import os
    import xml.etree.ElementTree as ET
    import numpy as np
    #np.set_printoptions(suppress=True, threshold=np.nan)  #10,000,000
    np.set_printoptions(suppress=True, threshold=10000000)  #10,000,000
    import matplotlib
    from PIL import Image
    def parse_obj(xml_path, filename):
        tree = ET.parse(xml_path + filename)
        objects = []
        for obj in tree.findall('object'):
            obj_struct = {}
            obj_struct['name'] = obj.find('name').text
            bbox = obj.find('bndbox')
            obj_struct['bbox'] = [int(bbox.find('xmin').text),
                                  int(bbox.find('ymin').text),
                                  int(bbox.find('xmax').text),
                                  int(bbox.find('ymax').text)]
            objects.append(obj_struct)
        return objects
    def read_image(image_path, filename):
        im = Image.open(image_path + filename)
        W = im.size[0]
        H = im.size[1]
        area = W * H
        im_info = [W, H, area]
        return im_info
    if __name__ == '__main__':
        image_path = '/home/dlut/网络/make_database/数据集——合集/VOCdevkit/VOC2018/JPEGImages/'
        xml_path = '/home/dlut/网络/make_database/数据集——合集/VOCdevkit/VOC2018/Annotations/'
        filenamess = os.listdir(xml_path)
        filenames = []
        for name in filenamess:
            name = name.replace('.xml', '')
            filenames.append(name)
        print(filenames)
        recs = {}
        ims_info = {}
        obs_shape = {}
        classnames = []
        num_objs={}
        obj_avg = {}
        for i, name in enumerate(filenames):
            print('正在处理 {}.xml '.format(name))
            recs[name] = parse_obj(xml_path, name + '.xml')
            print('正在处理 {}.jpg '.format(name))
            ims_info[name] = read_image(image_path, name + '.jpg')
        print('所有信息收集完毕。')
        print('正在处理信息......')
        for name in filenames:
            im_w = ims_info[name][0]
            im_h = ims_info[name][1]
            im_area = ims_info[name][2]
            for object in recs[name]:
                if object['name'] not in num_objs.keys():
                    num_objs[object['name']] = 1
                else:
                    num_objs[object['name']] += 1
                #num_objs += 1
                ob_w = object['bbox'][2] - object['bbox'][0]
                ob_h = object['bbox'][3] - object['bbox'][1]
                ob_area = ob_w * ob_h
                w_rate = ob_w / im_w
                h_rate = ob_h / im_h
                area_rate = ob_area / im_area
                if not object['name'] in obs_shape.keys():
                    obs_shape[object['name']] = ([[ob_w,
                                                   ob_h,
                                                   ob_area,
                                                   w_rate,
                                                   h_rate,
                                                   area_rate]])
                else:
                    obs_shape[object['name']].append([ob_w,
                                                      ob_h,
                                                      ob_area,
                                                      w_rate,
                                                      h_rate,
                                                      area_rate])
            if object['name'] not in classnames:
                classnames.append(object['name'])  # 求平均
        for name in classnames:
            obj_avg[name] = (np.array(obs_shape[name]).sum(axis=0)) / num_objs[name]
            print('{}的情况如下:*******\n'.format(name))
            print('  目标平均W={}'.format(obj_avg[name][0]))
            print('  目标平均H={}'.format(obj_avg[name][1]))
            print('  目标平均area={}'.format(obj_avg[name][2]))
            print('  目标平均与原图的W比例={}'.format(obj_avg[name][3]))
            print('  目标平均与原图的H比例={}'.format(obj_avg[name][4]))
            print('  目标平均原图面积占比={}\n'.format(obj_avg[name][5]))
        print('信息统计计算完毕。')
    

    3.修改xml文件中某个目标的名字为另一个名字

    #修改xml文件中的目标的名字,
    import os, sys
    import glob
    from xml.etree import ElementTree as ET
    # 批量读取Annotations下的xml文件
    # per=ET.parse(r'C:\Users\rockhuang\Desktop\Annotations\000003.xml')
    xml_dir = r'/home/dlut/网络/make_database/数据集——合集/VOCdevkit/VOC2018/Annotations'
    xml_list = glob.glob(xml_dir + '/*.xml')
    for xml in xml_list:
        print(xml)
        per = ET.parse(xml)
        p = per.findall('/object')
        for oneper in p:  # 找出person节点
            child = oneper.getchildren()[0]  # 找出person节点的子节点
            if child.text == 'PinNormal':   #需要修改的名字
                child.text = 'normal bolt'    #修改成什么名字
            if child.text == 'PinDefect':    #需要修改的名字
                child.text = 'defect bolt-1'   #修改成什么名字
        per.write(xml)
        print(child.tag, ':', child.text)
    

    修改为:

    以上为个人经验,希望能给大家一个参考,也希望大家多多支持脚本之家。

    您可能感兴趣的文章:
    • Python统计可散列的对象之容器Counter详解
    • Python 统计列表中重复元素的个数并返回其索引值的实现方法
    • Python实战之单词打卡统计
    • python之cur.fetchall与cur.fetchone提取数据并统计处理操作
    • python自动统计zabbix系统监控覆盖率的示例代码
    • python 统计代码耗时的几种方法分享
    • Python统计列表元素出现次数的方法示例
    • python统计RGB图片某像素的个数案例
    • Python jieba 中文分词与词频统计的操作
    • 利用Python3实现统计大量单词中各字母出现的次数和频率的方法
    • 使用Python 统计文件夹内所有pdf页数的小工具
    • python 统计list中各个元素出现的次数的几种方法
    • python调用百度AI接口实现人流量统计
    • Python代码覆盖率统计工具coverage.py用法详解
    • python 爬虫基本使用——统计杭电oj题目正确率并排序
    • 利用python汇总统计多张Excel
    • python统计mysql数据量变化并调用接口告警的示例代码
    • 用python实现监控视频人数统计
    上一篇:python数据类型相关知识扩展
    下一篇:Python手拉手教你爬取贝壳房源数据的实战教程
  • 相关文章
  • 

    © 2016-2020 巨人网络通讯 版权所有

    《增值电信业务经营许可证》 苏ICP备15040257号-8

    Python 统计数据集标签的类别及数目操作 Python,统计数据,集,标签,