본문 바로가기
반치용/문제해결(trouble shooting)

[파이썬]dicom 파일 헤더 비식별화 multi processing

by Cat.8 2020. 5. 28.

 

준비 -패키지 설치

pip install pydicom
pip install tqdm
pip install multiprocessing

준비 - 파일 생성

두 파일 다 dcm 파일이 있는 폴더 혹은 상위 폴더에 생성할 것.
하위 디렉토리 전체를 비식별화해서 변경원본은 따로 백업해둘것)

multi_body.py (.py로 생성할것)

# coding: utf-8

import os
import pydicom
from tqdm import tqdm
import time

# get dcm_file_list
def get_file_list() :
    try :
        list_path = []
        list_file = []
        list_full = []   
        
        for (path, _, file) in os.walk('.\\'):
            for each_file in file:
                if each_file[-4:] == '.dcm':
                    list_path.append(path)    
                    list_file.append(each_file)
                    list_full.append(os.path.join(os.getcwd(),path,each_file).replace('.\\',''))
        return list_full
    except : 
        return 'get_file_list error.'    


# de-identifier for multi
def de_identifier_for_multi(filename):
    try:
        Metadata = pydicom.filereader.dcmread(str(filename))
    except: return 'de_identifier // file reading error. '
    try:            
        # de-identify
        Metadata.PatientName = 'Anonymized'
        Metadata.PatientBirthDate = 'Anonymized'
        Metadata.PatientSex = 'Anonymized'
        Metadata.OtherPatientIDs = 'Anonymized'
        Metadata.PatientAge = 'Anonymized'
        Metadata.RequestingPhysician = 'Anonymized'
        Metadata.InstitutionName = 'Anonymized'
        Metadata.InstitutionAddress = 'Anonymized'
        Metadata.ReferringPhysicianName = 'Anonymized'
        Metadata.StationName = 'Anonymized'
        Metadata.PhysiciansofRecord = 'Anonymized'

        Metadata.save_as(str(filename))

            # TODO - revive
            # sql_query(True)  

    except:            

            # TODO - revive
            # sql_query(False)  
            return 'de_identifier error'

multi_exe.py(ipynb 가능 : 주피터 노트북 가능)

import os
import pydicom
import time

# get dcm_file_list
def get_file_list() :
    try :
        list_path = []
        list_file = []
        list_full = []   
        
        for (path, _, file) in os.walk('.\\'):
            for each_file in file:
                if each_file[-4:] == '.dcm':
                    list_path.append(path)    
                    list_file.append(each_file)
                    list_full.append(os.path.join(os.getcwd(),path,each_file).replace('.\\',''))
        return list_full
    except : 
        return 'get_file_list error.'
        
    # de-identifier for multi
def de_identifier_for_multi(filename):
    try:
        Metadata = pydicom.filereader.dcmread(str(filename))
    except: return 'de_identifier // file reading error. '
    try:            
        # de-identify
        Metadata.PatientName = 'Anonymized'
        Metadata.PatientBirthDate = 'Anonymized'
        Metadata.PatientSex = 'Anonymized'
        Metadata.OtherPatientIDs = 'Anonymized'
        Metadata.PatientAge = 'Anonymized'
        Metadata.RequestingPhysician = 'Anonymized'
        Metadata.InstitutionName = 'Anonymized'
        Metadata.InstitutionAddress = 'Anonymized'
        Metadata.ReferringPhysicianName = 'Anonymized'
        Metadata.StationName = 'Anonymized'
        Metadata.PhysiciansofRecord = 'Anonymized'

        Metadata.save_as(str(filename))

            # TODO - revive

    except:            

            # TODO - revive
            return 'de_identifier error'

파일 두 개를 넣고 두 번째 파일을 실행하면 됨

댓글