지렁이 게임 AI 학습시키기

목표

1. 지렁이 게임을 만들자.

2. 학습시킬 수 있도록 수정하자.

3. 학습시키자.

3단계로 나눠서 진행했다.

1. 지렁이게임 만들기

1차 목표인 지렁이게임을 만들어보자

지렁이게임은 slither.io 처럼 만드는게 아니다.

모두가 알만한 옛날 고전게임인 지렁이게임처럼 만들거다.

목표로 하는 아이템에 도달하기위해 이동을 하고, 아이템을 먹으면 꼬리가 하나씩 길어지는 게임.

처음 생각한건 한번 죽을 때 마다 맵의 사이즈가 10x10 ~ 30x30까지 바껴서 랜덤한 값으로 게임을 설정하려고 했으나

학습시키는데 시간과 정확도가 엄청 차이가 날 것 같아서 이부분의 코드는 뺐다.

스크립트

Player

using System.Collections;
using System.Collections.Generic;
using UnityEngine;


public class Player : MonoBehaviour
{
    [Header("이동")]
    public Vector3 startPos;
    public Vector3 nextPos;    // 다음 이동하는 위치

    // w-0, a-1, s-2, d-3
    public int curDir = 1;      // 현재 이동하는 방향 (이동하는동안 고정)
    public int inputDir = 1;     // 다음에 이동할 방향 (입력)

    public float moveTime;
    public float moveSpeed;

    [Space(20f)]
    [Header("지렁이")]
    public Transform head;
    public List<Transform> tails;
    public int startTailCount = 3;

    [Space(20f)]
    [Header("프리팹")]
    public GameObject p_tail;

    [Space(20f)]
    [Header("레이어")]
    public LayerMask l_item;
    public LayerMask l_tail;

    void Start()
    {
        head.position = startPos;

        for (int i = 0; i < startTailCount - 1; i++)
        {
            AddTail();
        }
    }

    void Update()
    {
        // 시간++
        moveTime += Time.deltaTime * moveSpeed;

        // 입력 (다음 이동방향)
        if (Input.GetKeyDown(KeyCode.W))
        {
            inputDir = 0;
        }
        else if (Input.GetKeyDown(KeyCode.A))
        {
            inputDir = 1;
        }
        else if (Input.GetKeyDown(KeyCode.S))
        {
            inputDir = 2;
        }
        else if (Input.GetKeyDown(KeyCode.D))
        {
            inputDir = 3;
        }

        if (moveTime > 0.5f)
        {
            moveTime = 0;
            Move();
        }

    }

    void Move()
    {
        if (curDir % 2 != inputDir % 2)  // 뒤로 이동 불가
            curDir = inputDir;
        switch (curDir)
        {
            case 0: nextPos += Vector3.forward; break;
            case 1: nextPos += Vector3.left; break;
            case 2: nextPos += Vector3.back; break;
            case 3: nextPos += Vector3.right; break;
        }

        TryEatItem(nextPos);

        // 꼬리 이동
        for (int i = tails.Count - 1; i > 0; i--)
        {
            tails[i].position = tails[i - 1].position;
        }
        tails[0].position = head.position;

        // 머리 이동
        head.position = nextPos;

        CheckCollideTail(nextPos);
    }


    void TryEatItem(Vector3 _pos)
    {
        Vector3 pos = new Vector3(_pos.x, 0, _pos.z);
        if (Physics.Raycast(pos, Vector3.up, out RaycastHit _item, 1f, l_item))
        {
            EatItem(_item.transform.gameObject);
        }
    }

    void EatItem(GameObject _item)
    {
        AddTail();
        GameManager.Instance.ChangeItemPos();
        moveSpeed += 0.1f;

        //점수 추가
    }

    bool CheckCollideTail(Vector3 _pos)
    {
        Vector3 pos = new Vector3(_pos.x, 1, _pos.z);
        if (Physics.Raycast(pos, Vector3.down, out RaycastHit _tail, 1f, l_tail))
        {
            Die();
            return true;
        }
        return false;
    }

    void Die()
    {
        ResetGame();

        //점수 깎기
    }

    void ResetGame()
    {
        GameManager.Instance.ResetGame();

        head.position = startPos;
        nextPos = startPos;
        moveSpeed = 1f;

        for (int i = tails.Count - 1; i > 0; i--)
        {
            Destroy(tails[i].gameObject);
            tails.RemoveAt(i);
        }

        tails[0].position = new Vector3(0, -2, 0);

        for (int i = 0; i < startTailCount - 1; i++)
        {
            AddTail();
        }
    }

    void AddTail()
    {
        GameObject _tail = Instantiate(p_tail, transform);
        _tail.transform.position = tails[^1].position;
        tails.Add(_tail.transform);
    }

}

생각나는대로 쭉 썼다.

플레이어가 이동이 가능하고, 아이템을 먹었는지 확인한다.

아이템을 먹으면 꼬리가 하나 더 길어지고, 죽으면 게임을 리셋한다.

GamaManager

using System.Collections;
using System.Collections.Generic;
using UnityEngine;

public class GameManager : Singleton<GameManager>
{
    [Header("프리팹")]
    public GameObject p_floor;
    public GameObject p_wall;
    public GameObject p_item;

    [Header("메추리알")]
    public Material m_wall;
    public Material m_floor1;
    public Material m_floor2;

    [Space(20f)]
    [Header("트랜스폼")]
    public Transform t_floors;
    public Transform t_walls;
    public Transform t_camera;

    [Space(20f)]
    [Header("게임 설정")]
    public int floorSize;
    public Vector3 cameraPos;   //floorSize에 맞게 카메라 위치 조정

    [Space(20f)]
    [Header("게임 오브젝트")]
    public Transform item;
    public PlayAgent player;


    void Start()
    {
        MakeItem();
        ResetGame();
    }

    void InitFloor()
    {
        // 바닥이랑 벽 파괴
        int numOfChild = t_floors.childCount;
        for (int i = 0; i < numOfChild; i++)
            Destroy(t_floors.GetChild(i).gameObject);
        numOfChild = t_walls.childCount;
        for (int i = 0; i < numOfChild; i++)
            Destroy(t_walls.GetChild(i).gameObject);

        // 범위만큼 새로 바닥&벽 만들기
        for (int i = -1; i <= floorSize; i++)
        {
            for (int j = -1; j <= floorSize; j++)
            {
                if (i == -1 | i == floorSize | j == -1 | j == floorSize)
                {
                    GameObject _wall = Instantiate(p_wall, t_walls);
                    _wall.transform.position = new Vector3(i, 0, j);
                    _wall.GetComponent<MeshRenderer>().material = m_wall;
                }
                else
                {
                    GameObject _floor = Instantiate(p_floor, t_floors);
                    _floor.transform.position = new Vector3(i, 0, j);
                    _floor.GetComponent<MeshRenderer>().material = (i + j) % 2 == 0 ? m_floor1 : m_floor2;
                }
            }
        }
    }

    public void MakeItem()
    {
        GameObject _item = Instantiate(p_item);
        item = _item.transform;
        ChangeItemPos();
    }

    public void ChangeItemPos()
    {
        item.position = new Vector3(Random.Range(0, GameManager.Instance.floorSize), 0.3f, Random.Range(0, GameManager.Instance.floorSize));
    }

    public void SetCameraPos()
    {
        t_camera.position = new Vector3(floorSize / 2, Mathf.Sqrt(3 / 2 * floorSize * floorSize), floorSize / 2);
    }

    public void ResetGame()
    {
        player.item = item;
        player.floorSize = floorSize;

        InitFloor();
        ChangeItemPos();
        SetCameraPos();
        floorSize = Random.Range(10, 21);
    }
}

게임을 리셋하면서 카메라위치, 아이템위치, 벽생성 등을 담당한다.

처음엔 이렇게 만들었긴 하다만 맵의 사이즈를 고정하면서 대부분 메소드가 쓸모없어졌다.

물론 위 스크립트는 그대로인 이유가 학습엔 쓸모없고 실행이 되도록 테스트하는 용도로 만들었기때문

이 스크립트대로 실행하면 지렁이게임이 잘 실행된다.

이제 이 스크립트를 학습시킬 수 있도록 수정해주면 된다.

2. 스크립트를 학습이 가능하도록 수정하자

위 스크립트에서 학습시킬 수 있도록 수정했다.

모든 정보는 유니티 본진에 찾아가면 다 있다.

https://docs.unity3d.com/Packages/com.unity.ml-agents@1.0/api/Unity.MLAgents.Agent.html

Class Agent | ML Agents | 1.0.8

Class Agent An agent is an actor that can observe its environment, decide on the best course of action using those observations, and execute those actions within the environment. Assembly : solution.dll Syntax public class Agent : MonoBehaviour, ISerializa

docs.unity3d.com

스크립트

PlayAgents

Player에서 수정된 부분이다.

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgents.Sensors;
using Unity.MLAgents.Actuators;
using UnityEngine.SceneManagement;


public class PlayAgent : Agent
{
    [Header("이동")]
    public Vector3 startPos;
    public Vector3 nextPos;    // 다음 이동하는 위치

    // w-0, a-1, s-2, d-3
    public int curDir = 0;      // 현재 이동하는 방향 (이동하는동안 고정)
    public int inputDir = 0;     // 다음에 이동할 방향 (입력)
    private int[] curDirection = { 0, 0, 0, 0 };

    public float moveTime = 0;
    public float moveSpeed = 1;

    [Space(20f)]
    [Header("지렁이")]
    public Transform head;
    public List<Transform> tails;
    public int startTailCount = 3;

    [Space(20f)]
    [Header("프리팹")]
    public GameObject p_tail;

    [Space(20f)]
    [Header("레이어")]
    public LayerMask l_item;
    public LayerMask l_tail;

    [Space(20f)]
    [Header("기타")]
    public float distance;

    void Move()
    {
        if (moveTime < 0.5f) { return; }
        moveTime = 0;

        distance = Vector3.Distance(head.position, item.position);

        // int curMax = 0;
        // for (int i = 0; i < curDirection.Length; i++)
        // {
        //     if (curDirection[i] > curMax)
        //     {
        //         curMax = curDirection[i];
        //         inputDir = i;
        //     }
        //     curDirection[i] = 0;
        // }

        if (curDir % 2 != inputDir % 2)  // 뒤로 이동 불가
            curDir = inputDir;
        else
        {
            if (curDir == inputDir) AddReward(-0.01f);
        }
        switch (curDir)
        {
            case 0: nextPos += Vector3.forward; break;
            case 1: nextPos += Vector3.left; break;
            case 2: nextPos += Vector3.back; break;
            case 3: nextPos += Vector3.right; break;
        }

        // 꼬리 이동
        for (int i = tails.Count - 1; i > 0; i--)
        {
            tails[i].position = tails[i - 1].position;
        }
        tails[0].position = head.position;

        // 머리 이동
        head.position = nextPos;
        
        if (distance > Vector3.Distance(head.position, item.position))
        {
            AddReward(0.0001f);
        }
        else
        {
            AddReward(-0.001f);
        }

        TryEatItem(nextPos);

        CheckCollideTail(nextPos);
    }

    void TryEatItem(Vector3 _pos)
    {
        Vector3 pos = new Vector3(_pos.x, agentStartPos.y + 1, _pos.z);
        if (Physics.Raycast(pos, Vector3.down, out RaycastHit _item, 1f, l_item))
        {
            EatItem(_item.transform.gameObject);
        }
    }

    void EatItem(GameObject _item)
    {
        ChangeItemPos();
        AddTail();
        moveSpeed += 0.1f;

        AddReward(1f);
        print(GetCumulativeReward());
    }

    void CheckCollideTail(Vector3 _pos)
    {
        Vector3 pos = new Vector3(_pos.x, agentStartPos.y + 1, _pos.z);
        if (Physics.Raycast(pos, Vector3.down, out RaycastHit _tail, 1f, l_tail))
        {
            SetReward(-1f);

            Die();
        }
    }

    void Die()
    {
        print(GetCumulativeReward());
        EndEpisode();
    }


    void ResetGame()
    {
        for (int i = 0; i < 3; i++)
        {
            tails[i].transform.position = startPos;
        }
        for (int i = tails.Count - 1; i >= 3; i--)
        {
            Destroy(tails[3].gameObject);
            tails.RemoveAt(3);
        }

        head.position = startPos;
        nextPos = startPos;
        curDir = 0;

        moveTime = 0;
        moveSpeed = 1;

        ChangeItemPos();
    }

    void AddTail()
    {
        GameObject _tail = Instantiate(p_tail, transform);
        _tail.transform.position = tails[^1].position;
        tails.Add(_tail.transform);
    }


    #region ML-Agents

    // 처음 실행 시 호출
    public override void Initialize()
    {
        t_walls = transform.parent.GetChild(1).transform;
        t_floors = transform.parent.GetChild(2).transform;
        agentStartPos = transform.GetChild(0).position;

        startPos = new Vector3(5, 0.25f, 5) + agentStartPos;
        nextPos = new Vector3(5, 0.25f, 5) + agentStartPos;    // 다음 이동하는 위치

        MakeItem();
        ResetGame();
        InitFloor();
    }

    public override void OnEpisodeBegin()
    {
        ResetGame();
    }

    // 모든 단계에서 에이전트 동작을 지정
    // 개발자가 구현해야 하는 부분
    public override void OnActionReceived(ActionBuffers actionBuffers)
    {
        moveTime += Time.deltaTime * moveSpeed;

        var action = actionBuffers.DiscreteActions[0];

        inputDir = action;
        // CheckDirection(action);
        Move();
    }

    private void CheckDirection(int direction)
    {
        curDirection[direction]++;
        Move();
    }

    // agent이 여기의 정보를 통해 목표를 찾아감
    public override void CollectObservations(VectorSensor sensor)
    {
        sensor.AddObservation(head.position.x / 10f);
        sensor.AddObservation(head.position.z / 10f);
        // sensor.AddObservation(tails[^1].position.x / 10f);
        // sensor.AddObservation(tails[^1].position.z / 10f);
        sensor.AddObservation(tails.Count/100);
        // sensor.AddObservation(floorSize / 30f);

        sensor.AddObservation((head.position.x - item.position.x) / 10f);
        sensor.AddObservation((head.position.z - item.position.z) / 10f);
        // sensor.AddObservation(item.position.x / 10f);
        // sensor.AddObservation(item.position.z / 10f);
        sensor.AddObservation(moveTime * 2);
        sensor.AddObservation(curDir / 3);
    }

    public override void Heuristic(in ActionBuffers actionsOut)
    {
        moveTime += Time.deltaTime * moveSpeed;

        var action = actionsOut.DiscreteActions;
        if (Input.GetKey(KeyCode.W))
        {
            action[0] = 0;
        }
        else if (Input.GetKey(KeyCode.A))
        {
            action[0] = 1;
        }
        else if (Input.GetKey(KeyCode.S))
        {
            action[0] = 2;
        }
        else if (Input.GetKey(KeyCode.D))
        {
            action[0] = 3;
        }
        inputDir = action[0];

        Move();
    }

    #endregion


    #region GameManager
    [Header("프리팹")]
    public GameObject p_floor;
    public GameObject p_wall;
    public GameObject p_item;

    [Header("메추리알")]
    public Material m_wall;
    public Material m_floor1;
    public Material m_floor2;

    [Space(20f)]
    [Header("트랜스폼")]
    public Transform t_floors;
    public Transform t_walls;
    public Vector3 agentStartPos;

    [Space(20f)]
    [Header("게임 설정")]
    public int floorSize;

    [Space(20f)]
    [Header("게임 오브젝트")]
    public Transform item;


    void InitFloor()
    {
        // 바닥이랑 벽 파괴
        int numOfChild = t_floors.childCount;
        for (int i = 0; i < numOfChild; i++)
            Destroy(t_floors.GetChild(i).gameObject);
        numOfChild = t_walls.childCount;
        for (int i = 0; i < numOfChild; i++)
            Destroy(t_walls.GetChild(i).gameObject);

        // 범위만큼 새로 바닥&벽 만들기
        for (int i = -1; i <= floorSize; i++)
        {
            for (int j = -1; j <= floorSize; j++)
            {
                if (i == -1 | i == floorSize | j == -1 | j == floorSize)
                {
                    GameObject _wall = Instantiate(p_wall, t_walls);
                    _wall.transform.position = new Vector3(i, 0, j) + agentStartPos;
                    _wall.GetComponent<MeshRenderer>().material = m_wall;
                }
                else
                {
                    GameObject _floor = Instantiate(p_floor, t_floors);
                    _floor.transform.position = new Vector3(i, 0, j) + agentStartPos;
                    _floor.GetComponent<MeshRenderer>().material = (i + j) % 2 == 0 ? m_floor1 : m_floor2;
                }
            }
        }
    }

    public void MakeItem()
    {
        GameObject _item = Instantiate(p_item);
        item = _item.transform;
        ChangeItemPos();
    }

    public void ChangeItemPos()
    {
        do
        {
            item.position = new Vector3(Random.Range(0, floorSize), 0.3f, Random.Range(0, floorSize)) + agentStartPos;
        } while (item.position == new Vector3(head.position.x, 0.3f, head.position.z));
    }

    #endregion

}

Player에서 위 부분을 추가&수정했고 뒷부분엔 GameManager를 그대로 복붙했다.

GameManager를 그대로 복붙한 이유는 코드 자체는 짧기때문에 오히려 관리가 편해서 하나로 묶어버렸다.

주석 부분은 2개다.

ㅡㅡㅡㅡㅡㅡㅡㅡㅡ주석 부분ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡ

Move

이동은 2가지로 구현했는데 비슷한 것 같다.

1. 이동 전 마지막으로 입력한 값

2. 이동까지 가장 많이 입력이 들어온 값

학습에 다른 영향을 줄까 싶었는데, 둘다 똑같았다.

Move관련된 부분의 주석만 제거하면 된다.

CollectObservations()

sensor에 어떤 값들을 주려고 시도했는지 스스로 확인할 때가 있을 것 같아서 지우지않고 주석처리했다.

그리고

sensor.AddObservation(curDir / 3);

이걸 쓰는게 학습 성과가 더 좋았다.

ㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡㅡ

위의 메소드들에 학습관련된 것만 보자면

AddReward(점수)

학습을 잘하거나 못했을 때 점수를 부여한다.

EndEpisode()

하나의 에피소드를 끝낸다. (죽은경우)

OnEpisodeBegin() {~~}

각 에피소드를 시작할 때 호출됨 ( ≒ Start() )

OnActionReceived(ActionBuffers actionBuffers) { ~~ }

에이전트의 동작을 지정하는 부분. ( ≒ Update() )

actionBuffers.DiscreteActions[0]

학습에 쓰이는 오브젝트중 0번 인덱스와 연결

CollectObservations(VectorSensor sensor) { ~~ }

여기서 sensor의 정보들을 통해 목표를 찾아감

sensor.AddObservation()

학습에 쓰이는 정보를 입력. 이 개수만큼 Space Size를 설정해야 함

print(GetCumulativeReward());

현재 에피소드의 Reward를 return함

더 필요한 부분이 있다면 위에 적어둔 유니티 본진으로 찾아가면 된다.

유니티에서 설정

Player의 스크립트를 수정해서 만든 PlayAgents를 넣어준 오브젝트에 컴포넌트를 더 추가해줘야 한다.

위처럼 3개의 컴포넌트를 더 추가했는데,

그중 Behavior Parameters 스크립트를 설정해줘야 한다.

Vector Observation

관측할 수치에 관련된 설정
Space Size: CollectObservations()에서 sensor.AddObservation를 사용한 개수를 입력해야 한다.
몇개의 정보가 주어졌는지 적어야 한다는 뜻
개수가 틀리면 Console창에서 틀렸다고 알려준다

Actions

Agent의 행동과 관련된 설정
Continuous Actions: 이산적 or 연속적 , 쉽게 회전이 가능한 방향당 1씩이다.
x,y축으로 회전이 가능하다면 2
이 게임에서 회전은 없으니 0을 넣음
Discreate Branchs: 입력으로 들어가는 이동방향의 그룹
branch 0 size에 4를 넣었는데 현재 이동방향이 상, 하, 좌, 우 라서 4를 넣었다
(만약 대각선이동이 가능하면 Discreate Branchs에 2, branch size 2를 넣으면 된다)
이부분은 쉽게 Input.GetAxis인지 GetButton인지로 구분한다고 보면 된다.

Model

학습이 완료된 모델어서 적용
학습시키려면 아무것도 넣지 않으면 된다.

추가로 PlayerAgent스크립트는 Agent를 상속받은 상태인데,

Max Step을 적당한 값으로 수정해줘야 한다. 모르겠다면 5000하면 된다.

학습시키기

cd C:\Unity\ml-agents-release_20
ml-agents가 깔려있는 위치로 이동
conda activate 이름
가상환경 설정(본인은 안함)
mlagents-learn config/ppo/basic.yaml --run-id=myProject
mlagents-learn yaml의 위치 --run-id=결과를 저장할 폴더이름

학습이 완료되면 ml-agents가 깔려있는 폴더의 results에 위 3번에서 설정한 폴더 이름으로 들어가면 onnx파일로 모델이 저장되어있다.

그 모델을 Behavior Parameters의 model에 넣으면 확인가능하다.

1차 학습시키기 - 아이템을 먹으러 이동

꼬리가 길어지는 부분을 주석처리하고 실행했다.

기본 꼬리는 3개라서 꼬리는 신경쓰지않고 아이템을 먹으러 이동하면 된다.

하지만 뒤로 이동은 불가능하고, 시도하면 점수를 깎는다.

이 과정에서 적당한 하이퍼파라미터를 찾을 것이다.

추가로 item의 위치를 알려주는 것보다 거리를 알려주는게 오히려 학습에 좋았다.

학습이 잘되도록 item과 가까워지면 +를, 멀어지면 -를 줬는데

거리를 알려주면 아이템을 먹으러 이동했지만,

위치를 알려주면 아이템 근처를 서성이면서 최대한 죽지않고 버티려고 했다.

        sensor.AddObservation((head.position.x - item.position.x) / 10f);
        sensor.AddObservation((head.position.z - item.position.z) / 10f);
        // sensor.AddObservation(item.position.x / 10f);
        // sensor.AddObservation(item.position.z / 10f);

학습시키다가 캡쳐했다.

2차 학습시키기 - 꼬리가 점점 길어짐

위에서 적당한 하이퍼파라미터를 찾았으니 꼬리가 길어지는 방법으로 다시 학습 시켰다.

추가로 1개를 먹을 때 마다 1.1배씩 빨라진다.

컴퓨터의 한계로 80만 step까지밖에 학습시키지 못했다.ㅜㅜ

느리지만 학습이 천천히 되고있다.

80만step까지 했을때 10개정도는 먹고 죽는다.

3차 학습시키기 - 추후 완...ㅜㅜ

죽을 때 마다 맵의 크기가 바뀌도록 만들었는데, 이부분을 넣으면 학습 시간이 더 길어지게 된다.

컴퓨터가 위 단계에서도 힘들게 학습시켜서 이부분은 나중에 컴퓨터 업그레이드를 하면 이어서 해야한다. ㅡㅏㅏ

에러가 났던 부분

1. 설치하면서 에러

설치하면서 에러가 가장 많았는데, 그 중 하나는 이거

https://portable-paper.tistory.com/entry/mlagent-%EC%84%A4%EC%B9%98-%EC%97%90%EB%9F%AC

mlagent 설치 에러

https://www.youtube.com/watch?v=RsCjC4yDpzE&t=338s 유니티 공식 유튜브 위 링크대로 따라했는데 mlagents-learn --help에서 막혔다. mlagensts-learn을 찾을수없다고 뜨고, 나중에는 TypeError: Descriptors cannot not be created di

portable-paper.tistory.com

이외에도 에러가 많았는데 검색하니 어렵지않게 해결했다.

이건 거의 하루를 날려먹었다.

추가로 이런 에러도 있었는데 골때렸다.ㅜㅜ

ObjectDisposedException: SerializedProperty tails.Array.data[18] has disappeared!

ObjectDisposedException: SerializedProperty tails.Array.data[18] has disappeared!

라는 에러

그리고 type is not a supported pptr value

유니티 버전 에러라고 하길래 버전을 바꾸니 해결됐다.

유니티가 아니더라도 python등이 버전에러인 경우가 가장 골치아픈 것 같다.

2. 갑자기 떨어진 Mean Reward

[INFO] Basic. Step: 70000. Time Elapsed: 1727.381 s. Mean Reward: -0.027. Std of Reward: 0.025. Training.
[INFO] Basic. Step: 72000. Time Elapsed: 1743.583 s. Mean Reward: 0.013. Std of Reward: 0.627. Training.

원래 -1~1로 나왔던 값이 뭘 잘못건들였나... -0.01~ 0.01로 나온다.

심지어 저 사이에서 변화가 없다. 더 떨어지지도 않고 더 오르지도 않았다.

학습이 전혀 안되고있다는 뜻으로 밖에 이해되지가 않았다.

찾다보니 코드문제가 있어서 코드를 수정했다.

Reward를 설정하는 부분에 문제가 있었던 것

3. 학습이 안된다.

머리로는 된다고 생각해서 만들었는데... 학습을 시키면 3가지경우 중 하나였다.

제자리에서 뱅뱅돈다
이건 AddReward(-0.001f)를 매프레임마다 적용시켜서 해결했다.
매 에피소드를 바로 죽는 방법을 선택한다
위 1번에서 0.01f가 크기때문에 더 낮춰서 해결했다. -> 0.001f
아무리 학습을 반복시켜도 학습이 안된다.
이게 가장 골때리는 문제... 50만번 반복을 해도 학습이 안됐다.
이건 학습의 문제가 아니라고 판단했고, 아래 2가지중 하나로 해결가능하다고 생각했다.
코드를 수정 or yaml수정

코드는 수정할 부분이 없었고, yaml을 계속 수정하면서 학습 강도를 높여서 학습시켰다.

그랬더니 학습이 어느정도 되기 시작했지만...

100만번정도 학습시키니 컴퓨터가 멈춰버린다. ㅜㅜ

처음 만들땐 예제에 쓰인 yaml을 그대로 가져와서 쓰면 될꺼라고 생각했다.

하지만 예제는 Action이 Continuous Actions였지만 이 게임은 Discrete Branches의 값으로 이뤄진다.

신기하게 이부분때문에 하이퍼파라미터가 완전 달랐던거였고 예상보다 길어졌다.

추가 팁

1. 하이퍼파라미터 모드

vis_encode_type를 Simple로 설정하면 전혀 학습이 안되고 다른 모드로 하니 확실하게 학습이 잘됐다.

normalize는 안키는게 좋은듯 했으나 true로 설정한게 오히려 더 좋았다.

나머지는 git을 찾아보며 적당히 수정했다.

vis_encode_type에 대해선 유니티 예제는 거의 Simple인데, 예제와 내가 만든 지렁이게임은 Input의 타입이 전혀 다르기때문에 Simple로 학습이 안 되는것 같다.

2. AddReward

Item에게 가까워지면 +0.0001f를 주고, 멀어지면 -0.001f를 줬다. 보상이 10배차이지만, 비슷하게 줘버리면 제자리에 돌아서 차이를 크게 했다.

위 방법처럼 목적을 달성한 경우에만 보상을 주는 것이 아니라, 목적을 달성하기 위해 움직이는 경우에 미세한 보상을 주면 학습이 엄청 잘된다.

찾아본 곳들

https://docs.unity3d.com/Packages/com.unity.ml-agents@1.0/api/Unity.MLAgents.Agent.html

Class Agent | ML Agents | 1.0.8

docs.unity3d.com

https://velog.io/@fran/Unity-ML-Agent-%EB%82%B4%EB%B6%80-%EC%9A%94%EC%86%8C-%EB%B0%8F-%ED%95%A8%EC%88%98

Unity ML-Agent 내부 요소 및 함수

에이전트의 학습에 관련된 파라미터학습의 결과로 나올 모델의 이름환경의 수치적 관측 관련 설정Space size를 통해 관측의 크기 설정 (ex. x, y, z좌표라면 3)Stacked Vector를 통해 관측의 누적 횟수 결

velog.io

https://ikaros79.tistory.com/entry/04-%EA%B8%B0%EB%B3%B8-%EA%B0%95%ED%99%94%ED%95%99%EC%8A%B5Reinforcement-Learning-%EC%98%88%EC%A0%9C-%EB%A7%8C%EB%93%A4%EA%B8%B0

04-기본 강화학습(Reinforcement Learning) 예제 만들기

본 내용은 아래의 링크의 내용을 따라하는 내용임. github.com/Unity-Technologies/ml-agents/blob/release_12_docs/docs/Learning-Environment-Create-New.md Unity-Technologies/ml-agents Unity Machine Learning Agents Toolkit. Contribute to Unity

ikaros79.tistory.com

https://ojui.tistory.com/9#recentEntries

02. ML-Agents - position,rigidbody 관측

🔷 실습2 - Agent가 Target을 향해 이동 - position, rigidbody 값 관측 - 연속 : actions.ContinuousActions 🔶 기본 세팅 - 폴더 정리 https://assetstore.unity.com/packages/3d/characters/free-mummy-monster-134212 Free Mummy Monster | 3D

ojui.tistory.com

외에도 많지만 잘 정리된곳만 적어뒀다.

+ yaml

https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Training-Configuration-File.md

'유니티_일기 > 지렁이게임_AI학습' 카테고리의 다른 글

mlagent 설치 에러 (0)	2023.10.19

portable_paper

지렁이 게임 AI 학습시키기

목표

1. 지렁이게임 만들기

스크립트

2. 스크립트를 학습이 가능하도록 수정하자

유니티에서 설정

학습시키기

에러가 났던 부분

추가 팁

찾아본 곳들

'유니티_일기 > 지렁이게임_AI학습' 카테고리의 다른 글

댓글

티스토리툴바

지렁이 게임 AI 학습시키기

목표

1. 지렁이게임 만들기

스크립트

2. 스크립트를 학습이 가능하도록 수정하자

유니티에서 설정

학습시키기

에러가 났던 부분

추가 팁

찾아본 곳들

'유니티_일기 > 지렁이게임_AI학습' 카테고리의 다른 글

관련글

댓글

티스토리툴바