최종 프로젝트 - 이미지 학습(1)

Listeria 2021. 7. 1. 05:07

초기 목표에는 없던 이미지 도용방지 / 닮은꼴 찾기 등을 추가하기로 하였다. 단순 성별구분 등이 아닌 특정인(ex. 연예인)을 정확하게 알려줄 수 있어야 하는데, 이와 같은 API 같은 것이 없으므로 여기 사용될 모델을 직접 만들기로 하였다.

이미지 학습에 많이 사용되는 Convolution Neural Network(이하 CNN)를 응용하기로 생각했고 이를 위해서 Tensorflow를 활용하기로 했다.

우선은 해외 사이트를 통해서 해외 연예인들의 이미지를 다운받을 수 있었기에 우선 이를 활용해서 모델을 만들어 보기로 하였다.(참고 블로그 : https://bcho.tistory.com/1176)

처음 해보는 작업이라 데이터 수집, 가공, tensorflow 코드까지 위 블로그에 나온 내용을 무작정 따라 해보았다. 하지만 블로그에서 사용한 텐서플로우는 v1으로 직접 값들을 하나하나 설정해주어야 하는 단점이 있었으며, 결정적으로 원하는 수준의 학습결과가 나오질 않았다.(코드는 위 블로그에 있는 걸 거의 그대로 썼기에 따로 첨부하지 않음)

이후에는 텐서플로우 v1을 사용해서 결과가 원하는대로 나오지 않는건가 생각이 들어 이를 v2에 맞춰서 코드를 수정하였다. 이 역시 처음 해보는 작업이라 tensorflow 홈페이지와 많은 자료들을 참고하며 많은 시행착오를 거쳐서 진행하였다.

import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import cv2

import re
from sklearn.model_selection import train_test_split
train_images = []
test_images = []
train_labels = []
test_labels = []
num_epochs = 30

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
      try:  # 메모리 증가를 허용(필요한 만큼만 GPU 메모리 할당)
        tf.config.experimental.set_memory_growth(gpus[0], True)
      except RuntimeError as e:
        print(e)  # 프로그램 시작시에 설정되어야만 합니다


for line in open('./newTrain01/training_file.txt', 'r'):
    cols = re.split(',', line)
    image = cv2.imread(cols[0])
    # random image
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_brightness(image, max_delta=0.5)
    image = tf.image.random_contrast(image, lower=0.2, upper=2.0)
    image = tf.image.random_hue(image, max_delta=0.08)
    image = tf.image.random_saturation(image, lower=0.2, upper=2.0)
    train_images.append(image)

for line in open('./faceData01/training_file.txt', 'r'):
    cols = re.split(',', line)
    image = cv2.imread(cols[0])
    # random image

    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_brightness(image, max_delta=0.5)
    image = tf.image.random_contrast(image, lower=0.2, upper=2.0)
    image = tf.image.random_hue(image, max_delta=0.08)
    image = tf.image.random_saturation(image, lower=0.2, upper=2.0)
    train_images.append(image)
    # 3rd column is label and needs to be converted to int type
    # train_labels.append(int(cols[2]))


for line in open('./newTrain01/genderFace3.txt', 'r'):
    line = re.sub('\n','',line)
    train_labels.append(line)

for line in open('./newTrain01/validate_file.txt', 'r'):
    cols = re.split(',', line)
    test_images.append(cv2.imread(cols[0]))

for line in open('./faceData01/validate_file.txt', 'r'):
    cols = re.split(',', line)
    test_images.append(cv2.imread(cols[0]))

for line in open('./newTrain01/genderVali.txt', 'r'):
    line = re.sub('\n','',line)
    test_labels.append(line)

new_list = []
for word in test_labels :
    if word == 'man' :
        new_list.append(int(0))
    if word == 'woman' :
        new_list.append(int(1))
test_labels = new_list
new_list=[]
for word in train_labels :
    if word == 'man' :
        new_list.append(int(0))
    if word == 'woman' :
        new_list.append(int(1))
train_labels = new_list
new_list = []
print('test label : ' )
print(len(test_labels))
print('train label : ')
print(len(train_labels))

for line in open('./newTrain01/genderFace4.txt', 'r'):
    line = re.sub('\n','',line)
    train_labels.append(int(line))

for line in open('./newTrain01/genderVali2.txt', 'r'):
    line = re.sub('\n','',line)
    test_labels.append(int(line))

print('test label2 : ')
print(len(test_labels))
print('train label2 : ')
print(len(train_labels))

x_train = np.array(train_images).reshape(-1, 96, 96, 3)
y_train = np.array(train_labels)
x_train = x_train/255.0

x_test = np.array(test_images).reshape(-1, 96, 96, 3)
y_test = np.array(test_labels)
x_test = x_test/255.0

print(len(x_test))
print(len(x_train))
print(len(y_test))
print(len(y_train))
x_test = x_test[:1300]
x_train = x_train[:5601]



model = tf.keras.models.Sequential()
model.add(layers.Conv2D(36, kernel_size=3, activation='relu', input_shape=(96, 96, 3)))
model.add(layers.MaxPool2D(pool_size=3,strides=2))
model.add(layers.Conv2D(64,kernel_size = 3,activation='relu', padding='same'))
model.add(layers.MaxPool2D(pool_size=3,strides=2))
model.add(layers.Conv2D(128,kernel_size = 3,activation='relu', padding='same'))
model.add(layers.MaxPool2D(pool_size=3,strides=2))
model.add(layers.Conv2D(256,kernel_size = 3,activation='relu', padding='same'))
model.add(layers.MaxPool2D(pool_size=3,strides=2))
model.add(layers.Conv2D(512,kernel_size=3,activation='relu',padding='same'))
model.add(layers.MaxPool2D(pool_size=3,strides=2))
model.add(layers.Flatten())

model.add(layers.Dropout(0.2))
model.add(layers.Dense(512,activation='relu'))
model.add(layers.Dense(512,activation='relu'))
model.add(layers.Dense(2,activation='softmax'))
model.summary()

# optimaizer = tf.compat.v1.train.GradientDescentOptimizer(0.00001)
model.compile(loss = 'binary_crossentropy',optimizer=tf.optimizers.Adam(learning_rate=0.0001),metrics=['acc'])

print('fitting')
hi = model.fit((x_train, y_train),2,batch_size=100,validation_data=(x_test,y_test),epochs=num_epochs)
print('evalutate')
acc = model.evaluate(x_test,y_test,verbose='2')
print(acc)

img = keras.preprocessing.image.load_img(
    'Aaron.jpeg', target_size=(96, 96)
)
img2 = keras.preprocessing.image.load_img(
    'jolie.jpeg', target_size=(96, 96)
)
img_array = keras.preprocessing.image.img_to_array(img)
img_array = tf.expand_dims(img_array, 0) # Create a batch
img_array2 = keras.preprocessing.image.img_to_array(img2)
img_array2 = tf.expand_dims(img_array2, 0) # Create a batch
predictions = model.predict(img_array)
predictions2 = model.predict(img_array2)
score = tf.nn.softmax(predictions[0])
scor2 = tf.nn.softmax(predictions2[0])

print(np.max(score))
print(np.max(scor2))

하지만 결과적으로 이번에도 만족할만한 결과가 나오질 않았는데, 혹시 특징을 추출하는 신경망이 얕은건 아닐까 하는 생각이 들었다. 그래서 사람 개개인을 구별하는 것이 아닌 남/녀로 구분하는 모델을 만드는데 사용해 보았는데, 남/녀를 구분하는데는 아무런 문제가 없었다.

그래서 해결방법에 대해 생각한 것이 학습에 필요한 이미지 수를 늘이던가 혹은 이미지의 특징을 더 잘뽑을 수 있도록 신경망의 깊이를 깊게 해야한다는 2가지 방법이었다.

우선적으로 학습에 사용되는 사진의 품질과 양을 개선해보기로 하였다. 실제로 수집한 사진이 해상도도 낮은경우가 많아 학습에 사용하기엔 적절치 않다는 생각이 들었다. 그래서 특정 사이트에 의존하는 것이 아닌 직접 구글 크롤링을 통해서 사진을 다운받기로 하였다.

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import ElementClickInterceptedException, NoSuchElementException, \
    ElementNotInteractableException
import time
import os
import urllib.request


def createFolder(directory):
    try:
        if not os.path.exists(directory):
            os.makedirs(directory)
    except OSError:
        print('Error: Creating directory. ' + directory)


keyword = '찾고싶은 사람 이름'
# createFolder('./images/' + keyword + '_img_download')

chromedriver = './chromedriver'
driver = webdriver.Chrome(chromedriver)
driver.implicitly_wait(3)

# =============================================================================
# 구글 이미지 검색 접속 및 검색어 입력
# =============================================================================
print(keyword, '검색')
driver.get('https://www.google.co.kr/imghp?hl=ko')

Keyword = driver.find_element_by_xpath('//*[@id="sbtc"]/div/div[2]/input')
Keyword.send_keys(keyword)

driver.find_element_by_xpath('//*[@id="sbtc"]/button').click()

# =============================================================================
# 스크롤
# =============================================================================
scroll_count = 0

print("ㅡ 스크롤 다운 시작 ㅡ")

# 스크롤 위치값 얻고 last_height 에 저장
last_height = driver.execute_script("return document.body.scrollHeight")

# 결과 더보기 버튼을 클릭했는지 유무
after_click = False

while True:
    print(f"ㅡ 스크롤 횟수: {scroll_count} ㅡ")
    # 스크롤 다운
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    scroll_count += 1
    time.sleep(1)

    # 스크롤 위치값 얻고 new_height 에 저장
    new_height = driver.execute_script("return document.body.scrollHeight")

    # 스크롤이 최하단이며
    if last_height == new_height:

        # 결과 더보기 버튼을 클릭한적이 있는 경우
        if after_click is True:
            print("ㅡ 스크롤 다운 종료 ㅡ")
            break

        # 결과 더보기 버튼을 클릭한적이 없는 경우
        if after_click is False:
            if driver.find_element_by_xpath('//*[@id="islmp"]/div/div/div/div/div/div/input').is_displayed():
                driver.find_element_by_xpath('//*[@id="islmp"]/div/div/div/div/div/div/input').click()
                after_click = True
            elif NoSuchElementException:
                print("ㅡ NoSuchElementException ㅡ")
                print("ㅡ 스크롤 다운 종료 ㅡ")
                break

    last_height = new_height
# =============================================================================
# 이미지 개수
# =============================================================================
links = []
images = driver.find_elements_by_css_selector("img.rg_i.Q4LuWd")
for image in images:
    if image.get_attribute('src') != None:
        links.append(image.get_attribute('src'))

print(keyword + ' 찾은 이미지 개수:', len(links))
time.sleep(2)

# =============================================================================
# 이미지 다운로드
# =============================================================================
for k, i in enumerate(links):
    url = i
    start = time.time()
    urllib.request.urlretrieve(url, "저장하는 경로, 사진이름 ".jpg")
    print(str(k + 1) + '/' + str(len(links)) + ' ' + keyword + ' 다운로드 중....... Download time : ' + str(
        time.time() - start)[:5] + ' 초')
print(keyword + ' ---다운로드 완료---')

driver.close()

from PIL import Image
print("ㅡ 필터링 시작 ㅡ")
filtered_count = 0
dir_name = "./images/"
for index, file_name in enumerate(os.listdir(dir_name)):
    try:
        file_path = os.path.join(dir_name, file_name)
        img = Image.open(file_path)

        # 이미지 해상도의 가로와 세로가 모두 350이하인 경우
        if img.width < 351 and img.height < 351:
            img.close()
            os.remove(file_path)
            print(f"{index} 번째 사진 삭제")
            filtered_count += 1

    # 이미지 파일이 깨져있는 경우
    except OSError:
        os.remove(file_path)
        filtered_count += 1

위 코드 구글링을 통해 찾은 코드인데, 해당 게시글에 나오는걸 90퍼센트이상 거의 그대로 사용한 것이지만, driver.find_element_by_xpath 에서 구글 이미지 경로가 살짝 바뀐부분이 있어서 이부분을 수정하였다.

얼굴을 인식하고 잘라내는 것을 기존에는 google API를 활용했었는데, 정확도는 굉장히 높았지만 인터넷이 필수이며 속도도 생각보다 느리다는 단점이 있었다. 그래서 OpenCv를 활용하기로 하였다.

def faceCrop(img_dir, save_dir):
    face_cascade = cv2.CascadeClassifier('haarcascade_frontface.xml')
    print(img_dir)
    print(save_dir)
    path = img_dir
    name_list = os.listdir(path)

    for line in name_list:
        i=0
        print(line)
        img_list = os.listdir(img_dir+'/'+line)
        for img in img_list:
            src = cv2.imread(path+'/'+line+'/'+img)
            print('open : '+img)
            src_gray = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)
            faces = face_cascade.detectMultiScale(image=src_gray, scaleFactor=1.1, minNeighbors=3, minSize=(20, 20),
                                                  flags=None)

            for x, y, w, h in faces:
                cropped = src[y + int(h / 14):y + h - int(h / 14), x + int(w / 14):x + w - int(w / 14)]
                save_path = save_dir+'/'+line
                if not os.path.isdir(save_path):
                    os.mkdir(save_path)
                cv2.imwrite(save_path+'/'+line+str(i)+'.jpeg', cropped)
                print('save :'+img+str(i))
            i+=1

def main(argv):
    srcdir = argv[1]
    desdir = argv[2]

    faceCrop(srcdir,desdir)


if __name__ == "__main__":
    main(sys.argv)

처음에는 얼굴이 아닌것을 인식한다던가 하는 문제가 있었지만 이는 face_cascade.detectMultiScale의 값을 조정하면 해결할 수 있다.

이를 활용해서 구글 크롤링을 통해 받은 사진을 알아서 얼굴만 크롭하여 저장하였다. 학습에 필요한 데이터를 만든 것인데, 이렇게 해도 생각보다 양질의 데이터를 다량으로 확보하는데는 많은 어려움이 있었다.

- 2 에서 계속