'AI/[Preprocessing]' 카테고리의 글 목록

AI/[Preprocessing]

[전처리] 사진 여러장처럼 보이게만들기(Augmentation) 2023.06.16
[전처리] 이미지 비율에 맞게 정사각형 만들기 2023.06.15
[전처리] 가져온 사진 정보 받기 (feat, xml) 2023.06.15 1
[전처리] 가져온 사진 정보 받기 (feat, json) 2023.06.14
[전처리] 폴더에 있는 사진 가져오기 2023.06.14
[전처리] 범주형 데이터 전처리 2023.06.06
[전처리] 손글씨 데이터 PCA 적용예시 2023.06.06 1

[전처리] 사진 여러장처럼 보이게만들기(Augmentation)

2023. 6. 16. 01:18

728x90

학습데이터를 변형시켜 마치 다른 학습데이터처럼 만들기 : 한장의 사진으로 여러장의 학습데이터를 만들 수 있다.

import cv2
import matplotlib.pyplot as plt
import numpy as np

image = cv2.imread("image02.jpeg")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# #### rotation ####
# angle = 30

# h, w = image.shape[:2]
# center = (w//2, h//2)
# M = cv2.getRotationMatrix2D(center, angle, 1.0)
#     # getRotationMatrix2D(중심 좌표, 회전 각도, 크기 변환 비율)

# rotated_img = cv2.warpAffine(image, M, (w, h))
#                 # warpAffine(원본 이미지, 회전 행렬, 이미지 크기)

# plt.imshow(image)
# plt.show()

# plt.imshow(rotated_img)
# plt.show()
# #### rotation ####

#### zoom ####
# h, w = image.shape[:2]

# zoom_scale = 4 # 이미지 확대/축소 배율
# enlarged_img = cv2.resize(image, (w*zoom_scale, h*zoom_scale), interpolation=cv2.INTER_CUBIC)
#             # resize(원본 이미지, (최종 너비, 최종 높이), 이미지 보간 방법 (ex: cv2.INTER_CUBIC))
# center = [enlarged_img.shape[0] // 2, enlarged_img.shape[1] // 2]
# cut_half = 300
# zoomed_img = enlarged_img[center[0]-cut_half:center[0]+cut_half, center[1]-cut_half:center[1]+cut_half]
# plt.imshow(zoomed_img)
# plt.show()
#### zoom ####


#### shift ####

# shift = (0, 50)
# M = np.float32([
#     [1, 0, shift[0]],
#     [0, 1, shift[1]]
# ])
# # 이동 행렬: 좌측 2x2 -> 회전 행렬 (현재 단위행렬), 우측 1열: 이동 행렬 (x 변위, y 변위)

# shifted_img = cv2.warpAffine(image, M, (image.shape[1], image.shape[0]))
# plt.imshow(shifted_img)
# plt.show()
#### shift ####

#### flip ####
# flipped_img_updown = cv2.flip(image, 0) # 상하반전
# flipped_img_leftright = cv2.flip(image, 1) # 좌우반전
# flipped_img_lr_other = cv2.flip(image, -1) # 상하 & 좌우반전

# plt.imshow(image)
# plt.show()
# plt.imshow(flipped_img_updown)
# plt.show()
# plt.imshow(flipped_img_leftright)
# plt.show()
# plt.imshow(flipped_img_lr_other)
# plt.show()
#### flip ####

#### salt-and-pepper noise ####
# noise = np.zeros(image.shape, np.uint8) # uint8 = unsigned int 8-bit (부호 없는 1바이트 정수)
# cv2.randu(noise, 0, 255)
# black = noise < 30 # [True, True, False, False, False, ...] 형태의 Mask 생성
# white = noise > 225
# noise[black] = 0
# noise[white] = 255

# noise_b = noise[:, :, 0] # image.shape (h, w, c) -> h*w*c -> color channel : B, G, R
# noise_g = noise[:, :, 1]
# noise_r = noise[:, :, 2]
# noisy_img = cv2.merge([
#     cv2.add(image[:, :, 0], noise_b),
#     cv2.add(image[:, :, 1], noise_g),
#     cv2.add(image[:, :, 2], noise_r)
# ])

# plt.imshow(image)
# plt.show()
# plt.imshow(noisy_img)
# plt.show()
#### salt-and-pepper noise ####


#### Gaussian Noise ####
# mean = 0
# var = 100
# sigma = var ** 0.5

# gauss = np.random.normal(mean, sigma, image.shape)
# gauss = gauss.astype('uint8')

# noisy_img = cv2.add(image, gauss)

# plt.imshow(noisy_img)
# plt.show()
#### Gaussian Noise ####


#### 색조 변경 ####
# RGB , HSV 
# hsv_img = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
# hue_shift = 30
# hsv_img[:, :, 0] = (hsv_img[:, :, 0] + hue_shift) % 180
# rgb_img = cv2.cvtColor(hsv_img, cv2.COLOR_HSV2RGB)

# plt.imshow(image)
# plt.show()
# plt.imshow(rgb_img)
# plt.show()
#### 색조 변경 ####


#### 색상 변환 ####
# hsv_img = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)

# # hsv[h, w, c]
# hsv_img[:, :, 0] += 50 # Hue -> 50도 증가
# hsv_img[:, :, 1] = np.uint8(hsv_img[:, :, 1] * 0.5)  # 채도
# hsv_img[:, :, 2] = np.uint8(hsv_img[:, :, 2] * 1.5)  # 밝기

# # imshow <- BGR / RGB 로 강제로 디코딩
# rgb_img = cv2.cvtColor(hsv_img, cv2.COLOR_HSV2RGB)
# plt.imshow(rgb_img)
# plt.show()
#### 색상 변환 ####


#### 이미지 크롭 ####
# x, y, w, h = 300, 300, 200, 200  # (100, 100) 좌표에서 (200 * 200) 크기로 자를 것임
# crop_img_wide = image[y-h:y+h, x-w:x+w] # (x, y) 를 중심으로 2w, 2h 크기로 자름
# crop_img_lt = image[y:y+h, x:x+w] # (x, y) 를 기점으로 (w, h) 만큼 오른쪽 아래로 간 크기로 자름

# plt.imshow(image)
# plt.show()
# plt.imshow(crop_img_wide)
# plt.show()
# plt.imshow(crop_img_lt)
# plt.show()
#### 이미지 크롭 ####

#### warpAffine ####
# x_diff = 50
# y_diff = 100
# h, w, c = image.shape
# M = np.float32([
#     [1, 0, x_diff],
#     [0, 1, y_diff]
# ]) # x축으로 50, y 축으로 100 이동하는 병진이동행렬
# shifted_img = cv2.warpAffine(image, M, (w, h))

# M = cv2.getRotationMatrix2D((w // 2, h // 2), 45, 1.0)
# rotated_img = cv2.warpAffine(image, M, (w, h))

# M = cv2.getRotationMatrix2D((w // 2, h // 2), 0, 0.5)
# halfed_img = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_AREA) # 가장자리를 검은색으로 칠한, 원본 이미지 크기와 같은 축소 이미지
# croped_img = halfed_img[h//2 - h//4 : h//2 + h//4, 
#                         w//2 - w//4 : w//2 + w//4] # 가장자리를 잘라낸 이미지

# resized_img = cv2.resize(image, (w//2, h//2), interpolation=cv2.INTER_AREA)
# plt.imshow(image)
# plt.show()
# plt.imshow(shifted_img)
# plt.show()
# plt.imshow(rotated_img)
# plt.show()
# plt.imshow(resized_img)
# plt.show()
# plt.imshow(halfed_img)
# plt.show()
# plt.imshow(croped_img)
# plt.show()
#### warpAffine ####

#### blurring ####
# blur_img = cv2.GaussianBlur(image, (5, 5), 5)

# plt.imshow(blur_img)
# plt.show()
#### blurring ####

#### adaptive threshold ####
# img_gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

# thresh = cv2.adaptiveThreshold(img_gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2)
# # ADAPTIVE_THRESH_MEAN_C: 적응형 임계값 처리, 임계값 기준을 평균치를 사용함
# # 인자 11: 블록 크기, 11x11 블록으로 이미지를 나눈 후 해당 영역
# plt.imshow(img_gray, 'gray')
# plt.show()
# plt.imshow(thresh, 'gray')
# plt.show()
#### adaptive threshold ####


#### 색온도 보정 ####
# org_img = image.copy()
# balance = [0.8, 0.7, 0.8]

# for i, value in enumerate(balance):
#     if value != 1.0:
#         org_img[:, :, i] = cv2.addWeighted(org_img[:,:,i], value, 0, 0, 0)
#                             # addWeighted: src에 대해 value만큼의 가중치로 색온도 조절

# plt.imshow(org_img)
# plt.show()
#### 색온도 보정 ####


#### 모션 블러 ####
# kernal_size = 15
# kernal_direction = np.zeros((kernal_size, kernal_size))
# kernal_direction[int((kernal_size)//2), :] = np.ones(kernal_size)
# kernal_direction /= kernal_size # 커널의 합이 1이 되도록
# kernal_matrix = cv2.getRotationMatrix2D((kernal_size/2, kernal_size/2), 45, 1)
# kernal = np.hstack((kernal_matrix[:, :2], [[0], [0]]))
#         # kernal_matrix[:, :2] <- 회전 행렬에서 병진이동 벡터를 제외하고 회전 행렬 값만 가져옴
#         # [[0],[0]] <- 병진이동 벡터 (이동 X)
# kernal = cv2.warpAffine(kernal_direction, kernal, (kernal_size, kernal_size))

# motion_blur_img = cv2.filter2D(image, -1, kernal)
# plt.imshow(motion_blur_img)
# plt.show()
#### 모션 블러 ####


#### 난수 노이즈 ####
# gray_img = cv2.imread('image02.jpeg', cv2.IMREAD_GRAYSCALE)
# h, w = gray_img.shape

# mean = 0
# var = 100
# sigma = var ** 0.5

# gaussian = np.random.normal(mean, sigma, (h, w))
# noisy_image = gray_img + gaussian.astype(np.uint8)
# # uint8 -> 0 ~ 255
# cv2.imshow("", noisy_image)
# cv2.waitKey()
#### 난수 노이즈 ####

#### 채도 조정 ####
# img = cv2.imread('image02.jpeg')
# org_img = img.copy()

# img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# saturation_factor = 1.5
# img_hsv[:, :, 1] = img_hsv[:, :, 1] * saturation_factor

# saturated_img = cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR)

# cv2.imshow("", org_img)
# cv2.waitKey()
# cv2.imshow("", saturated_img)
# cv2.waitKey()
#### 채도 조정 ####


#### 밝기 조정 ####
# img = cv2.imread('image02.jpeg')
# org_img = img.copy()

# bright_diff = 50
# img_brighten = cv2.convertScaleAbs(img, alpha=1, beta=bright_diff)

# cv2.imshow("org", org_img)
# cv2.imshow("brighten", img_brighten)
# cv2.waitKey()
#### 밝기 조정 ####

#### 노이즈 제거 ####
# img_filtered = cv2.medianBlur(image, 5)
# plt.imshow(image)
# plt.show()
# plt.imshow(img_filtered)
# plt.show()
#### 노이즈 제거 ####

#### 히스토그램 균일화 ####
# img_gray = cv2.imread("image02.jpeg", cv2.IMREAD_GRAYSCALE)
# img_equalized = cv2.equalizeHist(img_gray)

# cv2.imshow("org", img_gray)
# cv2.imshow("hist_equal", img_equalized)
# cv2.waitKey()
#### 히스토그램 균일화 ####

728x90

저작자표시 비영리 변경금지

'AI > [Preprocessing]' 카테고리의 다른 글

[전처리] 이미지 비율에 맞게 정사각형 만들기 (0)	2023.06.15
[전처리] 가져온 사진 정보 받기 (feat, xml) (1)	2023.06.15
[전처리] 가져온 사진 정보 받기 (feat, json) (0)	2023.06.14
[전처리] 폴더에 있는 사진 가져오기 (0)	2023.06.14
[전처리] 범주형 데이터 전처리 (0)	2023.06.06

Contents Creator 데이터 시대를 넘어 콘텐츠를 만끽하라 이미지 + 서버 + 네트워크를 활용한 콘텐츠 메이킹

[전처리] 이미지 비율에 맞게 정사각형 만들기

2023. 6. 15. 23:07

728x90

일반적으로 정사각형으로 만들면 화면비율이 깨짐

그런걸 방지하기 위해서 남는 부분은 다른 색으로 칠해버림

import matplotlib.pylab as plt
from PIL import Image

def expend2square(pil_img, background_color) :  # 배경이미지 크기계산
    width, heigth = pil_img.size
    
    if width == heigth :   # 이미 정사각형
        return pil_img
    
    elif width > heigth :  # 너비가 > 높이인 경우
        result = Image.new(pil_img.mode, (width, width), background_color)
        result.paste(pil_img, (0, (width - heigth) // 2))   # x 좌표는 0, y 좌표는 이미지 중앙에 이미지 붙임
        return result
    else :          # 높이가 > 너비인 경우
        result = Image.new(pil_img.mode, (heigth, heigth), background_color)
        result.paste(pil_img, ((heigth - width) //2,0))    # x 좌표는 이미지 중앙, y 좌표는 0 에 이미지 붙임
        return result
        

def resize_with_padding(pil_img, new_size, background_color) :  # 남는부분에 색칠하기
    img = expend2square(pil_img, background_color)
    img = img.resize((new_size[0], new_size[1]), Image.ANTIALIAS)
    
    return img
    
    
    
    
img = Image.open("./image01.jpeg")
img_new = resize_with_padding(img, (300,300), (0,0,255))  # 300, 300 : 사진 크기  # 0,0,255 : RGB 

plt.imshow(img)
plt.show()

plt.imshow(img_new)
plt.show()

728x90

저작자표시 비영리 변경금지

'AI > [Preprocessing]' 카테고리의 다른 글

[전처리] 사진 여러장처럼 보이게만들기(Augmentation) (0)	2023.06.16
[전처리] 가져온 사진 정보 받기 (feat, xml) (1)	2023.06.15
[전처리] 가져온 사진 정보 받기 (feat, json) (0)	2023.06.14
[전처리] 폴더에 있는 사진 가져오기 (0)	2023.06.14
[전처리] 범주형 데이터 전처리 (0)	2023.06.06

Contents Creator 데이터 시대를 넘어 콘텐츠를 만끽하라 이미지 + 서버 + 네트워크를 활용한 콘텐츠 메이킹

[전처리] 가져온 사진 정보 받기 (feat, xml)

2023. 6. 15. 22:38

728x90

XML은 Extensible Markup Language의 약어로, 데이터를 저장하고 전송하는 데 사용되는 마크업 언어입니다.

HTML과 매우 유사하 지만, XML은 데이터와 문서 구조를 설명하는 데 중점을 둡니다.

XML은 텍스트 기반 형식이며, 데이터와 그 구조를 기술하는 데 사용할 수 있는 여러가지 태그와 속성을 제공합니다. 데이터는 요소(element)라고도 부르는 태그를 사용하여 표시되며, 요소 는 속성(attribute)과 값(value)을 가질 수 있습니다.

XML은 데이터를 계층 구조로 구성할 수 있으므로, 다양한 소프트웨어 시스템 간에 데이터를 교환하는 데 매우 유용합니다. 또한, XML은 데이터의 검증 및 검색이 용이하며, 대부분의 프로그래밍 언어에서 XML 파 서를 지원하므로 데이터를 쉽게 처리할 수 있습니다.

import os 
import cv2
import matplotlib.pylab as plt
from xml.etree.ElementTree import parse

def xml_read(xml_path) :
    
    root = parse(xml_path).getroot()
    
    image_info = root.findall("image")
    
    for image in image_info :
        bbox = image.findall('box')
        # <image id="0" name="01.jpg" width="640" height="480">
        
        # image width height 
        image_width = image.attrib['width']
        image_heigth = image.attrib['height']
        
        # image name and path 
        image_name = image.attrib['name']
        image_name = os.path.join("./data/", image_name)
        
        # image read 
        image = cv2.imread(image_name)
        # image BGR -> RGB 
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        for box_info in bbox :
            label = box_info.attrib['label']
            
            # 값 -> string 타입 
            xtl = box_info.attrib['xtl']
            ytl = box_info.attrib['ytl']
            xbr = box_info.attrib['xbr']
            ybr = box_info.attrib['ybr']
            
            # 소수점이 포함이라 바로 int 값 변환 불가 하여 float 변환 
            xtl_f = float(xtl)
            ytl_f = float(ytl)
            xbr_f = float(xbr)
            ybr_f = float(ybr)
            
            # float -> int 변환 
            xtl_i = int(xtl_f)
            ytl_i = int(ytl_f)
            xbr_i = int(xbr_f)
            ybr_i = int(ybr_f)
            
            print(xtl_i, ytl_i, xbr_i, ybr_i, label)
            ### xtl="468.94" ytl="92.01" xbr="640.00" ybr="340.46"
            
            ### 이미지 시각화 
            image = cv2.rectangle(image, (xtl_i, ytl_i), (xbr_i, ybr_i), (0,255,0), 2)
            
            ### 라벨 추가 
            image = cv2.putText(image, label, (xtl_i, ytl_i-10), 
                  cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2, cv2.LINE_AA)
         
        plt.imshow(image)
        plt.show()
    
xml_read("./data/annotations.xml")

xml to yolo format 변경하기

import os 
import cv2
import matplotlib.pylab as plt
from xml.etree.ElementTree import parse

label_number_dict = {'cat':0, 'dog':1}

def xml_read(xml_path) :
    
    root = parse(xml_path).getroot()
    
    image_info = root.findall("image")
    
    for image in image_info :
        bbox = image.findall('box')
        # <image id="0" name="01.jpg" width="640" height="480">
        
        # image width height 
        image_width = image.attrib['width']
        image_heigth = image.attrib['height']
        
        # image name and path 
        image_name = image.attrib['name']
        image_name_temp = image_name
        image_name = os.path.join("./data/", image_name)
        
        # image read 
        image = cv2.imread(image_name)
        # image BGR -> RGB 
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        for box_info in bbox :
            label = box_info.attrib['label']
            
            # 값 -> string 타입 
            xtl = box_info.attrib['xtl']
            ytl = box_info.attrib['ytl']
            xbr = box_info.attrib['xbr']
            ybr = box_info.attrib['ybr']
            
            # 소수점이 포함이라 바로 int 값 변환 불가 하여 float 변환 
            xtl_f = float(xtl)
            ytl_f = float(ytl)
            xbr_f = float(xbr)
            ybr_f = float(ybr)
            
            # float -> int 변환 
            x1_i = int(xtl_f)
            y1_i = int(ytl_f)
            x2_i = int(xbr_f)
            y2_i = int(ybr_f)
            
            ### xtl="468.94" ytl="92.01" xbr="640.00" ybr="340.46"
            
            # 이미지 사이즈가 필요 위에 있는 image_width, image_heigth 경우는 string 타입 형변환 필요 int 
            img_width_i = int(image_width)
            img_height_i = int(image_heigth)
            
            # Pascal_xyxy to YOlO center_x center_y yolo_w yolo_h 
            
            center_x = ((x2_i + x1_i)/(2*img_width_i))
            center_y = ((y2_i + y1_i)/(2*img_height_i))
            yolo_w = (x2_i - x1_i)/img_width_i
            yolo_h = (y2_i - y1_i)/img_height_i
                                    
            # file_name 
            image_name_temp = image_name_temp.replace('.jpg', '')
            
            # label cat, dog -> 0, 1 로 변경하기 
            label_number = label_number_dict[label]
            
            print("Pascal_xyxy to YOLO >> ", label_number,center_x, center_y, yolo_w, yolo_h)
    
            # 텍스트로 저장하기 
            with open(f"{image_name_temp}.txt", "a") as f:
                f.write(f"{label_number} {center_x} {center_y} {yolo_w} {yolo_h} \n")
    
xml_read("./data/annotations.xml")

Pascal_xyxy to YOLO >> 1 / 0.865625 / 0.45 / 0.26875 / 0.5166666666666667

Pascal_xyxy to YOLO >> 0 / 0.16171875 / 0.6041666666666666 / 0.3140625 / 0.44583333333333336

728x90

저작자표시 비영리 변경금지

'AI > [Preprocessing]' 카테고리의 다른 글

[전처리] 사진 여러장처럼 보이게만들기(Augmentation) (0)	2023.06.16
[전처리] 이미지 비율에 맞게 정사각형 만들기 (0)	2023.06.15
[전처리] 가져온 사진 정보 받기 (feat, json) (0)	2023.06.14
[전처리] 폴더에 있는 사진 가져오기 (0)	2023.06.14
[전처리] 범주형 데이터 전처리 (0)	2023.06.06

Contents Creator 데이터 시대를 넘어 콘텐츠를 만끽하라 이미지 + 서버 + 네트워크를 활용한 콘텐츠 메이킹

[전처리] 가져온 사진 정보 받기 (feat, json)

2023. 6. 14. 16:17

728x90

JSON(JavaScript Object Notation)은 경량의 데이터 교환 형식 입니다.

JSON은 사람이 읽고 쓰기에 용이하고, 기계가 분석하고 생성 하기도 쉬워 많이 사용됩니다.

특히 웹에서 데이터를 주고받을 때 많이 사용되며, 대부분의 프로그래밍 언어에서 JSON 형식을 다룰 수 있습니다.

JSON은 키-값 쌍으로 이루어진 데이터 오브젝트를 저장합니다. 키(key)는 문자열이며, 값(value)은 문자열, 숫자, 불리언, 배열, 객체 등이 될 수 있습니다. 이러한 데이터 오브젝트들은 중첩될 수 있어 복잡한 데이터도 표현할 수 있습니다.

import json

# json 파일을 가져와야합니다.
json_data_path ="./data/instances_default.json"

with open(json_data_path, 'r', encoding="utf-8") as j :  # 'r' read
    json_data = json.load(j)
    
print(f"json type : {type(json_data)}")
print("json_data : ", json_data )  # 키, 밸류로 반환

다차원 딕셔너리 형태라 원하는 정보를 위해선 한차원 더 들어가야합니다.

# 변수 선언
categories_info = json_data['categories']
# [{'id': 1, 'name': 'cat', 'supercategory': ''}, {'id': 2, 'name': 'dog', 'supercategory': ''}]

images_info = json_data['images']
# [{'id': 1, 'width': 640, 'height': 480, 'file_name': '01.jpg', 'license': 0, 'flickr_url': '', 'coco_url': '', 'date_captured': 0}]

위의 정보를 이용하여 이미지 시각화 실습

import json 
import os
import cv2
import matplotlib.pylab as plt

# json_path 경로 지정 
json_path = "./data/instances_default.json"

# json 읽기
with open(json_path, 'r', encoding='utf-8') as j : # 'r' : read
    json_data = json.load(j)
    
category_info = json_data['categories']
images_info = json_data['images']
annotations_info = json_data['annotations']

# 라벨 딕셔너리 선언 
label_dict = {1: "cat", 2:"dog"}  # 0 은 배경임

for image_json in images_info : 
    print(image_json)
    # {'id': 1, 'width': 640, 'height': 480, 'file_name': '01.jpg', 'license': 0, 'flickr_url': '', 'coco_url': '', 'date_captured': 0}

    file_name = image_json['file_name']
    image_path = os.path.join("./data/", file_name)
    image_id = image_json['id']
    print(image_path)
    # ./data/01.jpg
    
    # image read 
    image = cv2.imread(image_path)
    # iamge BGR -> RGB 
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # bbox info 
    for anno_json in annotations_info : 
        if image_id == anno_json['image_id'] :
            bbox = anno_json['bbox']
            # 좌표 변수에 박스 좌표 저장 (int 형 변환 이유 : cv2.rectangle 좌표 값은 int 형태만 가능)
            x = int(bbox[0])
            y = int(bbox[1])
            w = int(bbox[2])
            h = int(bbox[3])
            # 박스 좌표 확인 
            print("bbox 좌표 >> " , x, y, w, h)
            # bbox 좌표 >>  468 92 171 248
            # bbox 좌표 >>  3 183 200 214 
            
            
            # 박스 그리기 
            cv2.rectangle(image, (x,y), (x+w, y+h), (0,255,0),2)
            
            # 라벨 표시 
            category_id = anno_json['category_id']
            label_name = label_dict[category_id]
            print(label_name)
            # dog
            # cat 
            
            image = cv2.putText(image, label_name, (x, y-10),  # 위에 글자두려고 -10 
                  cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2, cv2.LINE_AA)
            
            
    plt.imshow(image)
    plt.show()

json format -> Yolo format 으로 변경하고 텍스트 파일로 저장하기

import json 
import os
import cv2
import matplotlib.pylab as plt

# json_path 경로 지정 
json_path = "./data/instances_default.json"

# json 읽기
with open(json_path, 'r', encoding='utf-8') as j : 
    json_data = json.load(j)
    
category_info = json_data['categories']
images_info = json_data['images']
annotations_info = json_data['annotations']

# 라벨 딕셔너리 선언 
# 0 -> cat , 1 -> dog
label_dict = {1: 0, 2: 1}

for image_json in images_info : 
    print(image_json)
    file_name = image_json['file_name']
    image_path = os.path.join("./data/", file_name)
    image_id = image_json['id']
    print(image_path)
    print("file name ", file_name)
    
    # image read 
    image = cv2.imread(image_path)
    # iamge BGR -> RGB 
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # image size 
    img_height, img_width, img_channel = image.shape
    
    # bbox info 
    for anno_json in annotations_info : 
        if image_id == anno_json['image_id'] :
            bbox = anno_json['bbox']
            # 좌표 변수에 박스 좌표 저장 (int 형 변환 이유 : cv2.rectangle 좌표 값은 int 형태만 가능)
            x = int(bbox[0])
            y = int(bbox[1])
            w = int(bbox[2])
            h = int(bbox[3])
            
            # 박스 그리기 
            cv2.rectangle(image, (x,y), (x+w, y+h), (0,255,0),2)
            
            # 라벨 표시 
            category_id = anno_json['category_id']
            label_number = label_dict[category_id]
            
            # xywh -> center_x, center_y, w, h 변경 하기 
            center_x = ((2*x + w)/(2*img_width))
            center_y = ((2*y + h)/(2*img_height))
            yolo_w = w/img_width
            yolo_h = h/img_height
            
            print("yolo 좌표 변경 값 >> ",label_number,center_x, center_y, yolo_w, yolo_h)
            # 이미지 명과 라벨 파일 명이 동일해야합니다. 
            # 위에 file_name 경우는 01.jpg 우리가 필요한것은 01 이라는 이름 입니다. 
            #file_name_tmep = os.path.splitext(file_name)[0]
            file_name_temp = file_name.replace(".jpg", "")
            
            # 텍스트 파일 쓰기 
        with open(f"{file_name_temp}.txt", "a") as f:  # "a" 는 덮어쓰기 말고 새로만듬
            f.write(f"{label_number} {center_x} {center_y} {yolo_w} {yolo_h} \n")

728x90

저작자표시 비영리 변경금지

'AI > [Preprocessing]' 카테고리의 다른 글

[전처리] 이미지 비율에 맞게 정사각형 만들기 (0)	2023.06.15
[전처리] 가져온 사진 정보 받기 (feat, xml) (1)	2023.06.15
[전처리] 폴더에 있는 사진 가져오기 (0)	2023.06.14
[전처리] 범주형 데이터 전처리 (0)	2023.06.06
[전처리] 손글씨 데이터 PCA 적용예시 (1)	2023.06.06

Contents Creator 데이터 시대를 넘어 콘텐츠를 만끽하라 이미지 + 서버 + 네트워크를 활용한 콘텐츠 메이킹

[전처리] 폴더에 있는 사진 가져오기

2023. 6. 14. 16:16

728x90

학습을 위해 대량의 사진을 가져오기 위한 코드

일반적으로 사진을 그냥 막 가져와버리면 정렬이 제대로 안됨

우선 사과라는 폴더안에 있는 사진들을 가져오고 싶을때

# os listdir
import os
# 이미지가 저장된 디렉토리 경로
img_dir = './사과/'

# 디렉토리 내 모든 파일 목록 가져오기
file_list = os.listdir(img_dir)
print(file_list)

# 단점 정렬 되지 않습니다.

sorted를 써도 마찬가지

# 만약 정렬 하고 싶다면 sort 함수 사용 
# os listdir
import os
# 이미지가 저장된 디렉토리 경로
img_dir = './사과/'

# 디렉토리 내 모든 파일 목록 가져오기
file_list = sorted(os.listdir(img_dir))
print(file_list)

import glob
import os 

file_list = glob.glob(os.path.join("./사과/", "*.jpg"))
print(file_list)

# 을 활용해야 제대로 정렬된 상태로 가져옴

폴더안의 폴더도 for문으로 가능함

#### os.walk 를 이용한 폴더에서 이미지 파일 가져오기 함수 구현

os.walk() 하위의 폴더들을 for문으로 탐색할 수 있게 해줍니다. 인자로 전달된 path에 대해서 다음 3개의 값이 있는 tuple을 넘겨줍니다.

- root : dir과 files가 있는 path

- dirs : root 아래에 있는 폴더들

- files : root 아래에 있는 파일들

def get_img_paths(root_path): # 하위에 있는 경로 모두 탐색
    file_paths = []
    for (path, dir, files) in os.walk(root_path):
        for file in files:
            ext = os.path.splitext(file)[-1].lower()
            formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.tiff', '.dng']
            if ext in formats:
                file_path = os.path.join(path, file)
                file_paths.append(file_path)
    return file_paths

file_paths = get_img_paths("./사과/")
print(file_paths)

# 정렬 하고 싶다면 natsort.natsorted 이용
# file_paths_sort = natsort.natsorted(file_paths)
# print(file_list_sort)

728x90

저작자표시 비영리 변경금지

'AI > [Preprocessing]' 카테고리의 다른 글

[전처리] 이미지 비율에 맞게 정사각형 만들기 (0)	2023.06.15
[전처리] 가져온 사진 정보 받기 (feat, xml) (1)	2023.06.15
[전처리] 가져온 사진 정보 받기 (feat, json) (0)	2023.06.14
[전처리] 범주형 데이터 전처리 (0)	2023.06.06
[전처리] 손글씨 데이터 PCA 적용예시 (1)	2023.06.06

Contents Creator 데이터 시대를 넘어 콘텐츠를 만끽하라 이미지 + 서버 + 네트워크를 활용한 콘텐츠 메이킹

[전처리] 범주형 데이터 전처리

2023. 6. 6. 01:00

728x90

1. 순서가 없는 범주형 데이터

import numpy as np 
from sklearn.preprocessing import LabelBinarizer, MultiLabelBinarizer

feature = np.array((['가나다라'],
                    ['가나다라'],
                    ['아바하자'],
                    ['카나다사']))

print(feature)

# 원-핫 인코더 생성
one_hot = LabelBinarizer()
one_hot.fit_transform(feature) # 특성을 원-핫 인코딩 변환 
# 특성 클래스 확인 
print(one_hot.classes_)

"""
[['가나다라']
 ['가나다라']
 ['아바하자']
 ['카나다사']]
['가나다라' '아바하자' '카나다사']
"""

2. 다중 클래스 특성에 대한 처리

multiclass_feature = [('가나다라마','아자바하나'),
                     ('자다가나라','자다나타하'),
                     ('가나다라마','아자바하나'),
                     ('아마자나가','아카나다하'),
                     ('가나다라마','아자바하나'),
                     ]
one_hot_mult = MultiLabelBinarizer()
one_hot_mult.fit_transform(multiclass_feature)
print(one_hot_mult.classes_)

# ['가나다라마' '아마자나가' '아자바하나' '아카나다하' '자다가나라' '자다나타하']

3. 문자 타깃 데이터 원-핫 인코딩

from sklearn.preprocessing import OneHotEncoder

str_feature = ([['안녕', 1],
                ['저녁', 2],
                ['안녕', 1],
                ['점심', 3],
               ])

one_hot_encoder = OneHotEncoder(sparse=False)

# One hot encoder -> 입력 특성 배열을 모두 범주형
one_hot_encoder.fit_transform(str_feature)
print(one_hot_encoder.categories_)

# [array(['안녕', '저녁', '점심'], dtype=object), array([1, 2, 3], dtype=object)]

4. 순서가 있는 범주형 특성 인코딩

- 순서가 있는 클래스는 순서 개념을 가진 수치값으로 변환
- 딕셔너리 사용해서 -> 특성

import pandas as pd 
# 특성 데이터 생성
dataframe = pd.DataFrame({
    'Score' : ["Low", "Low", "Medium", "Medium", "High"]
})

print(dataframe)

"""
    Score
0     Low
1     Low
2  Medium
3  Medium
4    High
"""


# 매핑 딕셔너리 생성
scale_mapper = {
    "Low" : 1,
    "Medium" : 2,
    "High" : 3
}

print(scale_mapper)
# {'Low': 1, 'Medium': 2, 'High': 3}

data = dataframe["Score"].replace(scale_mapper)
print(data)
"""
0    1
1    1
2    2
3    2
4    3
Name: Score, dtype: int64
"""

4-2. 순서가 있는 범주형 특성 인코딩

from sklearn.preprocessing import OrdinalEncoder

feature_array = np.array((['Low', 10],
                    ['High', 40],
                    ['Medium',3],))

ordinal_encoder = OrdinalEncoder() 
ordinal_encoder.fit_transform(feature_array)
print(ordinal_encoder.categories_)
# [array(['High', 'Low', 'Medium'], dtype='<U21'), array(['10', '3', '40'], dtype='<U21')]

4-3. 순서가 있는 범주형 특성 인코딩

- 특성 딕셔너리 인코딩

from sklearn.feature_extraction import DictVectorizer
# 딕셔너리 생성 
data_dict =[{"Red" : 2 , "Blue" : 4},
            {"Red" : 4 , "Blue" : 3},
            {"Red" : 1 , "Yellow" : 2 },
            {"Red" : 1 , "Yellow" : 2}]


dictVectorizer = DictVectorizer(sparse=False)

feature_dict = dictVectorizer.fit_transform(data_dict)
print(feature_dict)

feature_dict_name = dictVectorizer.get_feature_names()
print(feature_dict_name)

dict_data = pd.DataFrame(feature_dict,  columns=feature_dict_name)
print(dict_data)


"""
[[4. 2. 0.]
 [3. 4. 0.]
 [0. 1. 2.]
 [0. 1. 2.]]
['Blue', 'Red', 'Yellow']
   Blue  Red  Yellow
0   4.0  2.0     0.0
1   3.0  4.0     0.0
2   0.0  1.0     2.0
3   0.0  1.0     2.0
"""

범주형 데이터 - 누락된 클래스값 대처하기 1

- knn으로 주변 그룹을 활용하여 nan의 값 예측함

from sklearn.neighbors import KNeighborsClassifier
x = np.array([[0, 2.10, 1.48],
             [1,1.18,1.33],
             [0,1.22,1.27],
             [1, -0.20, -1.15]])

x_with_nan = np.array([[np.nan, 0.87, 1.33], [np.nan, -0.67, -0.22]]) # 일부러 nan 생성

clf = KNeighborsClassifier(3, weights='distance')

print(x[:,1:])
print(x[:,0])

"""
[[  nan  0.87  1.33]
 [  nan -0.67 -0.22]]
[[ 2.1   1.48]
 [ 1.18  1.33]
 [ 1.22  1.27]
 [-0.2  -1.15]]
[0. 1. 0. 1.]
"""

train_model = clf.fit(x[:,1:], x[:,0])
imputer_values = train_model.predict(x_with_nan[:,1:]) # 누락된 값의 클래스 예측 

x_with_imputer = np.hstack((imputer_values.reshape(-1,1), x_with_nan[:,1:]))
data = np.vstack((x_with_imputer, x)) # 두 특성 행렬을 연결 
print(data)
"""
[[ 0.    0.87  1.33]
 [ 1.   -0.67 -0.22]
 [ 0.    2.1   1.48]
 [ 1.    1.18  1.33]
 [ 0.    1.22  1.27]
 [ 1.   -0.2  -1.15]]
 """

근처의 값을 비교해서 주변 그룹에 많이 있는 라벨을 따라감

범주형 데이터 - 누락된 클래스값 대처하기 2

- 누락된 값을 특성에서 가장 자주 등장하는 값으로 채우기

from sklearn.impute import SimpleImputer
x_complete = np.vstack((x_with_nan, x))
print("전")
print(x_complete)

impute = SimpleImputer(strategy='most_frequent')
data_impute = impute.fit_transform(x_complete)
print("후")
print(data_impute)
"""
전
[[  nan  0.87  1.33]
 [  nan -0.67 -0.22]
 [ 0.    2.1   1.48]
 [ 1.    1.18  1.33]
 [ 0.    1.22  1.27]
 [ 1.   -0.2  -1.15]]
후
[[ 0.    0.87  1.33]
 [ 0.   -0.67 -0.22]
 [ 0.    2.1   1.48]
 [ 1.    1.18  1.33]
 [ 0.    1.22  1.27]
 [ 1.   -0.2  -1.15]]
 """

728x90

저작자표시 비영리 변경금지

'AI > [Preprocessing]' 카테고리의 다른 글

[전처리] 이미지 비율에 맞게 정사각형 만들기 (0)	2023.06.15
[전처리] 가져온 사진 정보 받기 (feat, xml) (1)	2023.06.15
[전처리] 가져온 사진 정보 받기 (feat, json) (0)	2023.06.14
[전처리] 폴더에 있는 사진 가져오기 (0)	2023.06.14
[전처리] 손글씨 데이터 PCA 적용예시 (1)	2023.06.06

Contents Creator 데이터 시대를 넘어 콘텐츠를 만끽하라 이미지 + 서버 + 네트워크를 활용한 콘텐츠 메이킹

PCA 클래스: Scikit-learn의 decomposition 모듈에서 제공되는 클래스 중 하나입니다. PCA는 데이터셋의 차원을 감소시키는 기술로, 데이터셋에서 가장 중요한 특성만 추출하여 새로운 차원 축으로 변환합니다. 이를 통해 데이터셋의 노이즈(noise)를 제거하고, 더욱 빠르고 효율적인 학습이 가능해집니다.

n_components: PCA 클래스의 인자 중 하나로, 추출할 주성분(principal component)의 수를 지정합니다. 여기서는 99%의 분산(variance)을 유지하도록 설정되어 있습니다. 이는 데이터셋에서 99%의 정보가 유지되도록 차원을 축소하는 것을 의미합니다.

whiten: PCA 클래스의 인자 중 하나로, True로 설정할 경우 PCA의 결과로 나오는 주성분들이 서로 독립적인 값이 되도록 백색화(whitening)를 수행합니다. 백색화를 하면 각 주성분의 분산이 1이 되고, 상관 관계가 없는 성분들로 구성된 새로운 특성 공간이 만들어집니다.

fit_transform(): PCA 클래스에는 fit()과 transform() 메서드가 있습니다. fit() 메서드는 PCA 모델을 학습하고, transform() 메서드는 학습된 모델을 사용하여 데이터를 변환합니다. fit_transform() 메서드는 이 두 단계를 한 번에 수행합니다.

위의 같이 PCA이용하면 99%의 분산을 유지하도록 새로운 특성(feature) 공간으로 변환하고 있습니다. 결과적으로, 원본 데이터셋의 특성 개수는 features.shape[1]으로 확인할 수 있고, PCA를 수행하여 감소된 특성 개수는 features_pca.shape[1]으로 확인할 수 있습니다. 이렇게 차원 축소를 수행하면, 모델의 학습 시간을 단축시키고, 과적합(overfitting)을 방지할 수 있습니다.

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn import datasets

digits = datasets.load_digits() # 8x8 크기의 손글씨 숫자 데이터 로드 
feature = StandardScaler().fit_transform(digits.data) # 특성 행렬을 표준화 처리 

print(feature)
 [[ 0.         -0.33501649 -0.04308102 ... -1.14664746 -0.5056698
  -0.19600752]
 [ 0.         -0.33501649 -1.09493684 ...  0.54856067 -0.5056698
  -0.19600752]
 [ 0.         -0.33501649 -1.09493684 ...  1.56568555  1.6951369
  -0.19600752]
 ...
 [ 0.         -0.33501649 -0.88456568 ... -0.12952258 -0.5056698
  -0.19600752]
 [ 0.         -0.33501649 -0.67419451 ...  0.8876023  -0.5056698
  -0.19600752]
 [ 0.         -0.33501649  1.00877481 ...  0.8876023  -0.26113572
  -0.19600752]]

pca = PCA(n_components=0.99, whiten=True)  # 99% 분산을 유지하도록 PCA객체 생성 / whiten=True 평균을 0, 표준편차를 1로변경
features_pca = pca.fit_transform(feature) # PCA 수행

print("원본 특성 개수 >> " , feature.shape[1])
print("줄어든 특성 개수 >> " , features_pca.shape[1])

# 원본 특성 개수 >>  64
# 줄어든 특성 개수 >>  54

1. StandardScaler()
>> Scikit-learn의 전처리(preprocessing) 모듈에서 제공되는 클래스 중 하나입니다. 이 클래스는 데이터를 평균이 0, 분산이 1인 가우시안 정규 분포(standard normal distribution)로 변환합니다.

2. digits.data : digits 데이터셋에서 숫자 이미지의 각 픽셀 값을 포함하는 배열

3. fit_transform() 메서드
>> StandardScaler 클래스에는 데이터를 변환하는 두 가지 단계가 있습니다. 첫째, 모델을 학습(fit)하고, 둘째, 학습된 모델을 사용하여 데이터를 변환(transform)합니다. fit_transform() 메서드는 이 두 단계를 한 번에 수행합니다. 즉, 데이터를 표준화(normalize)하고, 변환된 값을 반환합니다.

따라서 위의 코드는 digits 데이터셋의 특성을 가우시안 정규 분포로 변환한 후, 변환된 값을 featuress 변수에 할당 이렇게 정규화를 수행하면, 모델이 데이터를 더 잘 이해하고, 모델의 예측 성능을 향상 시킬 수 있습니다.

결과값 비교

1. 데이터불러오기

from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

digits = load_digits()

# 진짜 차이가 있는지 체크 하기 위해서 -> 정규화 하지 않은 데이터로 분류 모델 훈련
print(len(digits.data))   
print(len(digits.target))
x_train, x_test, y_train, y_test = train_test_split(digits.data, digits.target , random_state=777)

print(len(x_train), len(x_test))
print(len(y_train), len(y_test))


"""
8 : 2 
2 -> 1 : 1 
8 : 1 : 1


1. 폴더 생성 (이미지 100개)
2. 폴더 읽고 -> train val test 나눠서 폴더 생성해서 거기에 이미지 저장
"""

# 1797
# 1797
# 1347 450
# 1347 450

2. 모델불러오기

model = LogisticRegression(max_iter=10000)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

no_standardScaler_acc_score = accuracy_score(y_test, y_pred) # 정답지 예측치
print(no_standardScaler_acc_score)

# 0.9533333333333334

3. StandardScaler 적용후 -> ACC

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
x_train_norm = scaler.fit_transform(x_train)
x_test_norm = scaler.transform(x_test)
model_norm = LogisticRegression(max_iter=10000)
model_norm.fit(x_train_norm, y_train)
y_pred_norm = model_norm.predict(x_test_norm)

standardScale_acc_score = accuracy_score(y_test, y_pred_norm)
print(standardScale_acc_score)

# 0.9555555555555556 값 상승함

728x90

저작자표시 비영리 변경금지

'AI > [Preprocessing]' 카테고리의 다른 글

[전처리] 이미지 비율에 맞게 정사각형 만들기 (0)	2023.06.15
[전처리] 가져온 사진 정보 받기 (feat, xml) (1)	2023.06.15
[전처리] 가져온 사진 정보 받기 (feat, json) (0)	2023.06.14
[전처리] 폴더에 있는 사진 가져오기 (0)	2023.06.14
[전처리] 범주형 데이터 전처리 (0)	2023.06.06

Contents Creator 데이터 시대를 넘어 콘텐츠를 만끽하라 이미지 + 서버 + 네트워크를 활용한 콘텐츠 메이킹

PREV 이전 1 NEXT 다음

내 블로그 - 관리자 홈 전환	`Q` `Q`
새 글 쓰기	`W` `W`

글 수정 (권한 있는 경우)	`E` `E`
댓글 영역으로 이동	`C` `C`

이 페이지의 URL 복사	`S` `S`
맨 위로 이동	`T` `T`
티스토리 홈 이동	`H` `H`
단축키 안내	`Shift` + `/` `⇧` + `/`

Contents Creator