Creating the Gymnasium Environment¶

In [1]:
import gym
import random
import ale_py
import pprint

Available Environment¶

In [15]:
all_envs = gym.envs.registry.keys()
[key for key in all_envs]
Out[15]:
['ALE/Adventure-v5',
 'ALE/Adventure-ram-v5',
 'ALE/AirRaid-v5',
 'ALE/AirRaid-ram-v5',
 'ALE/Alien-v5',
 'ALE/Alien-ram-v5',
 'ALE/Amidar-v5',
 'ALE/Amidar-ram-v5',
 'ALE/Assault-v5',
 'ALE/Assault-ram-v5',
 'ALE/Asterix-v5',
 'ALE/Asterix-ram-v5',
 'ALE/Asteroids-v5',
 'ALE/Asteroids-ram-v5',
 'ALE/Atlantis-v5',
 'ALE/Atlantis-ram-v5',
 'ALE/Atlantis2-v5',
 'ALE/Atlantis2-ram-v5',
 'ALE/Backgammon-v5',
 'ALE/Backgammon-ram-v5',
 'ALE/BankHeist-v5',
 'ALE/BankHeist-ram-v5',
 'ALE/BasicMath-v5',
 'ALE/BasicMath-ram-v5',
 'ALE/BattleZone-v5',
 'ALE/BattleZone-ram-v5',
 'ALE/BeamRider-v5',
 'ALE/BeamRider-ram-v5',
 'ALE/Berzerk-v5',
 'ALE/Berzerk-ram-v5',
 'ALE/Blackjack-v5',
 'ALE/Blackjack-ram-v5',
 'ALE/Bowling-v5',
 'ALE/Bowling-ram-v5',
 'ALE/Boxing-v5',
 'ALE/Boxing-ram-v5',
 'ALE/Breakout-v5',
 'ALE/Breakout-ram-v5',
 'ALE/Carnival-v5',
 'ALE/Carnival-ram-v5',
 'ALE/Casino-v5',
 'ALE/Casino-ram-v5',
 'ALE/Centipede-v5',
 'ALE/Centipede-ram-v5',
 'ALE/ChopperCommand-v5',
 'ALE/ChopperCommand-ram-v5',
 'ALE/CrazyClimber-v5',
 'ALE/CrazyClimber-ram-v5',
 'ALE/Crossbow-v5',
 'ALE/Crossbow-ram-v5',
 'ALE/Darkchambers-v5',
 'ALE/Darkchambers-ram-v5',
 'ALE/Defender-v5',
 'ALE/Defender-ram-v5',
 'ALE/DemonAttack-v5',
 'ALE/DemonAttack-ram-v5',
 'ALE/DonkeyKong-v5',
 'ALE/DonkeyKong-ram-v5',
 'ALE/DoubleDunk-v5',
 'ALE/DoubleDunk-ram-v5',
 'ALE/Earthworld-v5',
 'ALE/Earthworld-ram-v5',
 'ALE/ElevatorAction-v5',
 'ALE/ElevatorAction-ram-v5',
 'ALE/Enduro-v5',
 'ALE/Enduro-ram-v5',
 'ALE/Entombed-v5',
 'ALE/Entombed-ram-v5',
 'ALE/Et-v5',
 'ALE/Et-ram-v5',
 'ALE/FishingDerby-v5',
 'ALE/FishingDerby-ram-v5',
 'ALE/FlagCapture-v5',
 'ALE/FlagCapture-ram-v5',
 'ALE/Freeway-v5',
 'ALE/Freeway-ram-v5',
 'ALE/Frogger-v5',
 'ALE/Frogger-ram-v5',
 'ALE/Frostbite-v5',
 'ALE/Frostbite-ram-v5',
 'ALE/Galaxian-v5',
 'ALE/Galaxian-ram-v5',
 'ALE/Gopher-v5',
 'ALE/Gopher-ram-v5',
 'ALE/Gravitar-v5',
 'ALE/Gravitar-ram-v5',
 'ALE/Hangman-v5',
 'ALE/Hangman-ram-v5',
 'ALE/HauntedHouse-v5',
 'ALE/HauntedHouse-ram-v5',
 'ALE/Hero-v5',
 'ALE/Hero-ram-v5',
 'ALE/HumanCannonball-v5',
 'ALE/HumanCannonball-ram-v5',
 'ALE/IceHockey-v5',
 'ALE/IceHockey-ram-v5',
 'ALE/Jamesbond-v5',
 'ALE/Jamesbond-ram-v5',
 'ALE/JourneyEscape-v5',
 'ALE/JourneyEscape-ram-v5',
 'ALE/Kaboom-v5',
 'ALE/Kaboom-ram-v5',
 'ALE/Kangaroo-v5',
 'ALE/Kangaroo-ram-v5',
 'ALE/KeystoneKapers-v5',
 'ALE/KeystoneKapers-ram-v5',
 'ALE/KingKong-v5',
 'ALE/KingKong-ram-v5',
 'ALE/Klax-v5',
 'ALE/Klax-ram-v5',
 'ALE/Koolaid-v5',
 'ALE/Koolaid-ram-v5',
 'ALE/Krull-v5',
 'ALE/Krull-ram-v5',
 'ALE/KungFuMaster-v5',
 'ALE/KungFuMaster-ram-v5',
 'ALE/LaserGates-v5',
 'ALE/LaserGates-ram-v5',
 'ALE/LostLuggage-v5',
 'ALE/LostLuggage-ram-v5',
 'ALE/MarioBros-v5',
 'ALE/MarioBros-ram-v5',
 'ALE/MiniatureGolf-v5',
 'ALE/MiniatureGolf-ram-v5',
 'ALE/MontezumaRevenge-v5',
 'ALE/MontezumaRevenge-ram-v5',
 'ALE/MrDo-v5',
 'ALE/MrDo-ram-v5',
 'ALE/MsPacman-v5',
 'ALE/MsPacman-ram-v5',
 'ALE/NameThisGame-v5',
 'ALE/NameThisGame-ram-v5',
 'ALE/Othello-v5',
 'ALE/Othello-ram-v5',
 'ALE/Pacman-v5',
 'ALE/Pacman-ram-v5',
 'ALE/Phoenix-v5',
 'ALE/Phoenix-ram-v5',
 'ALE/Pitfall-v5',
 'ALE/Pitfall-ram-v5',
 'ALE/Pitfall2-v5',
 'ALE/Pitfall2-ram-v5',
 'ALE/Pong-v5',
 'ALE/Pong-ram-v5',
 'ALE/Pooyan-v5',
 'ALE/Pooyan-ram-v5',
 'ALE/PrivateEye-v5',
 'ALE/PrivateEye-ram-v5',
 'ALE/Qbert-v5',
 'ALE/Qbert-ram-v5',
 'ALE/Riverraid-v5',
 'ALE/Riverraid-ram-v5',
 'ALE/RoadRunner-v5',
 'ALE/RoadRunner-ram-v5',
 'ALE/Robotank-v5',
 'ALE/Robotank-ram-v5',
 'ALE/Seaquest-v5',
 'ALE/Seaquest-ram-v5',
 'ALE/SirLancelot-v5',
 'ALE/SirLancelot-ram-v5',
 'ALE/Skiing-v5',
 'ALE/Skiing-ram-v5',
 'ALE/Solaris-v5',
 'ALE/Solaris-ram-v5',
 'ALE/SpaceInvaders-v5',
 'ALE/SpaceInvaders-ram-v5',
 'ALE/SpaceWar-v5',
 'ALE/SpaceWar-ram-v5',
 'ALE/StarGunner-v5',
 'ALE/StarGunner-ram-v5',
 'ALE/Superman-v5',
 'ALE/Superman-ram-v5',
 'ALE/Surround-v5',
 'ALE/Surround-ram-v5',
 'ALE/Tennis-v5',
 'ALE/Tennis-ram-v5',
 'ALE/Tetris-v5',
 'ALE/Tetris-ram-v5',
 'ALE/TicTacToe3D-v5',
 'ALE/TicTacToe3D-ram-v5',
 'ALE/TimePilot-v5',
 'ALE/TimePilot-ram-v5',
 'ALE/Trondead-v5',
 'ALE/Trondead-ram-v5',
 'ALE/Turmoil-v5',
 'ALE/Turmoil-ram-v5',
 'ALE/Tutankham-v5',
 'ALE/Tutankham-ram-v5',
 'ALE/UpNDown-v5',
 'ALE/UpNDown-ram-v5',
 'ALE/Venture-v5',
 'ALE/Venture-ram-v5',
 'ALE/VideoCheckers-v5',
 'ALE/VideoCheckers-ram-v5',
 'ALE/VideoChess-v5',
 'ALE/VideoChess-ram-v5',
 'ALE/VideoCube-v5',
 'ALE/VideoCube-ram-v5',
 'ALE/VideoPinball-v5',
 'ALE/VideoPinball-ram-v5',
 'ALE/WizardOfWor-v5',
 'ALE/WizardOfWor-ram-v5',
 'ALE/WordZapper-v5',
 'ALE/WordZapper-ram-v5',
 'ALE/YarsRevenge-v5',
 'ALE/YarsRevenge-ram-v5',
 'ALE/Zaxxon-v5',
 'ALE/Zaxxon-ram-v5',
 'Adventure-v0',
 'AdventureDeterministic-v0',
 'AdventureNoFrameskip-v0',
 'Adventure-v4',
 'AdventureDeterministic-v4',
 'AdventureNoFrameskip-v4',
 'Adventure-ram-v0',
 'Adventure-ramDeterministic-v0',
 'Adventure-ramNoFrameskip-v0',
 'Adventure-ram-v4',
 'Adventure-ramDeterministic-v4',
 'Adventure-ramNoFrameskip-v4',
 'AirRaid-v0',
 'AirRaidDeterministic-v0',
 'AirRaidNoFrameskip-v0',
 'AirRaid-v4',
 'AirRaidDeterministic-v4',
 'AirRaidNoFrameskip-v4',
 'AirRaid-ram-v0',
 'AirRaid-ramDeterministic-v0',
 'AirRaid-ramNoFrameskip-v0',
 'AirRaid-ram-v4',
 'AirRaid-ramDeterministic-v4',
 'AirRaid-ramNoFrameskip-v4',
 'Alien-v0',
 'AlienDeterministic-v0',
 'AlienNoFrameskip-v0',
 'Alien-v4',
 'AlienDeterministic-v4',
 'AlienNoFrameskip-v4',
 'Alien-ram-v0',
 'Alien-ramDeterministic-v0',
 'Alien-ramNoFrameskip-v0',
 'Alien-ram-v4',
 'Alien-ramDeterministic-v4',
 'Alien-ramNoFrameskip-v4',
 'Amidar-v0',
 'AmidarDeterministic-v0',
 'AmidarNoFrameskip-v0',
 'Amidar-v4',
 'AmidarDeterministic-v4',
 'AmidarNoFrameskip-v4',
 'Amidar-ram-v0',
 'Amidar-ramDeterministic-v0',
 'Amidar-ramNoFrameskip-v0',
 'Amidar-ram-v4',
 'Amidar-ramDeterministic-v4',
 'Amidar-ramNoFrameskip-v4',
 'Assault-v0',
 'AssaultDeterministic-v0',
 'AssaultNoFrameskip-v0',
 'Assault-v4',
 'AssaultDeterministic-v4',
 'AssaultNoFrameskip-v4',
 'Assault-ram-v0',
 'Assault-ramDeterministic-v0',
 'Assault-ramNoFrameskip-v0',
 'Assault-ram-v4',
 'Assault-ramDeterministic-v4',
 'Assault-ramNoFrameskip-v4',
 'Asterix-v0',
 'AsterixDeterministic-v0',
 'AsterixNoFrameskip-v0',
 'Asterix-v4',
 'AsterixDeterministic-v4',
 'AsterixNoFrameskip-v4',
 'Asterix-ram-v0',
 'Asterix-ramDeterministic-v0',
 'Asterix-ramNoFrameskip-v0',
 'Asterix-ram-v4',
 'Asterix-ramDeterministic-v4',
 'Asterix-ramNoFrameskip-v4',
 'Asteroids-v0',
 'AsteroidsDeterministic-v0',
 'AsteroidsNoFrameskip-v0',
 'Asteroids-v4',
 'AsteroidsDeterministic-v4',
 'AsteroidsNoFrameskip-v4',
 'Asteroids-ram-v0',
 'Asteroids-ramDeterministic-v0',
 'Asteroids-ramNoFrameskip-v0',
 'Asteroids-ram-v4',
 'Asteroids-ramDeterministic-v4',
 'Asteroids-ramNoFrameskip-v4',
 'Atlantis-v0',
 'AtlantisDeterministic-v0',
 'AtlantisNoFrameskip-v0',
 'Atlantis-v4',
 'AtlantisDeterministic-v4',
 'AtlantisNoFrameskip-v4',
 'Atlantis-ram-v0',
 'Atlantis-ramDeterministic-v0',
 'Atlantis-ramNoFrameskip-v0',
 'Atlantis-ram-v4',
 'Atlantis-ramDeterministic-v4',
 'Atlantis-ramNoFrameskip-v4',
 'BankHeist-v0',
 'BankHeistDeterministic-v0',
 'BankHeistNoFrameskip-v0',
 'BankHeist-v4',
 'BankHeistDeterministic-v4',
 'BankHeistNoFrameskip-v4',
 'BankHeist-ram-v0',
 'BankHeist-ramDeterministic-v0',
 'BankHeist-ramNoFrameskip-v0',
 'BankHeist-ram-v4',
 'BankHeist-ramDeterministic-v4',
 'BankHeist-ramNoFrameskip-v4',
 'BattleZone-v0',
 'BattleZoneDeterministic-v0',
 'BattleZoneNoFrameskip-v0',
 'BattleZone-v4',
 'BattleZoneDeterministic-v4',
 'BattleZoneNoFrameskip-v4',
 'BattleZone-ram-v0',
 'BattleZone-ramDeterministic-v0',
 'BattleZone-ramNoFrameskip-v0',
 'BattleZone-ram-v4',
 'BattleZone-ramDeterministic-v4',
 'BattleZone-ramNoFrameskip-v4',
 'BeamRider-v0',
 'BeamRiderDeterministic-v0',
 'BeamRiderNoFrameskip-v0',
 'BeamRider-v4',
 'BeamRiderDeterministic-v4',
 'BeamRiderNoFrameskip-v4',
 'BeamRider-ram-v0',
 'BeamRider-ramDeterministic-v0',
 'BeamRider-ramNoFrameskip-v0',
 'BeamRider-ram-v4',
 'BeamRider-ramDeterministic-v4',
 'BeamRider-ramNoFrameskip-v4',
 'Berzerk-v0',
 'BerzerkDeterministic-v0',
 'BerzerkNoFrameskip-v0',
 'Berzerk-v4',
 'BerzerkDeterministic-v4',
 'BerzerkNoFrameskip-v4',
 'Berzerk-ram-v0',
 'Berzerk-ramDeterministic-v0',
 'Berzerk-ramNoFrameskip-v0',
 'Berzerk-ram-v4',
 'Berzerk-ramDeterministic-v4',
 'Berzerk-ramNoFrameskip-v4',
 'Bowling-v0',
 'BowlingDeterministic-v0',
 'BowlingNoFrameskip-v0',
 'Bowling-v4',
 'BowlingDeterministic-v4',
 'BowlingNoFrameskip-v4',
 'Bowling-ram-v0',
 'Bowling-ramDeterministic-v0',
 'Bowling-ramNoFrameskip-v0',
 'Bowling-ram-v4',
 'Bowling-ramDeterministic-v4',
 'Bowling-ramNoFrameskip-v4',
 'Boxing-v0',
 'BoxingDeterministic-v0',
 'BoxingNoFrameskip-v0',
 'Boxing-v4',
 'BoxingDeterministic-v4',
 'BoxingNoFrameskip-v4',
 'Boxing-ram-v0',
 'Boxing-ramDeterministic-v0',
 'Boxing-ramNoFrameskip-v0',
 'Boxing-ram-v4',
 'Boxing-ramDeterministic-v4',
 'Boxing-ramNoFrameskip-v4',
 'Breakout-v0',
 'BreakoutDeterministic-v0',
 'BreakoutNoFrameskip-v0',
 'Breakout-v4',
 'BreakoutDeterministic-v4',
 'BreakoutNoFrameskip-v4',
 'Breakout-ram-v0',
 'Breakout-ramDeterministic-v0',
 'Breakout-ramNoFrameskip-v0',
 'Breakout-ram-v4',
 'Breakout-ramDeterministic-v4',
 'Breakout-ramNoFrameskip-v4',
 'Carnival-v0',
 'CarnivalDeterministic-v0',
 'CarnivalNoFrameskip-v0',
 'Carnival-v4',
 'CarnivalDeterministic-v4',
 'CarnivalNoFrameskip-v4',
 'Carnival-ram-v0',
 'Carnival-ramDeterministic-v0',
 'Carnival-ramNoFrameskip-v0',
 'Carnival-ram-v4',
 'Carnival-ramDeterministic-v4',
 'Carnival-ramNoFrameskip-v4',
 'Centipede-v0',
 'CentipedeDeterministic-v0',
 'CentipedeNoFrameskip-v0',
 'Centipede-v4',
 'CentipedeDeterministic-v4',
 'CentipedeNoFrameskip-v4',
 'Centipede-ram-v0',
 'Centipede-ramDeterministic-v0',
 'Centipede-ramNoFrameskip-v0',
 'Centipede-ram-v4',
 'Centipede-ramDeterministic-v4',
 'Centipede-ramNoFrameskip-v4',
 'ChopperCommand-v0',
 'ChopperCommandDeterministic-v0',
 'ChopperCommandNoFrameskip-v0',
 'ChopperCommand-v4',
 'ChopperCommandDeterministic-v4',
 'ChopperCommandNoFrameskip-v4',
 'ChopperCommand-ram-v0',
 'ChopperCommand-ramDeterministic-v0',
 'ChopperCommand-ramNoFrameskip-v0',
 'ChopperCommand-ram-v4',
 'ChopperCommand-ramDeterministic-v4',
 'ChopperCommand-ramNoFrameskip-v4',
 'CrazyClimber-v0',
 'CrazyClimberDeterministic-v0',
 'CrazyClimberNoFrameskip-v0',
 'CrazyClimber-v4',
 'CrazyClimberDeterministic-v4',
 'CrazyClimberNoFrameskip-v4',
 'CrazyClimber-ram-v0',
 'CrazyClimber-ramDeterministic-v0',
 'CrazyClimber-ramNoFrameskip-v0',
 'CrazyClimber-ram-v4',
 'CrazyClimber-ramDeterministic-v4',
 'CrazyClimber-ramNoFrameskip-v4',
 'Defender-v0',
 'DefenderDeterministic-v0',
 'DefenderNoFrameskip-v0',
 'Defender-v4',
 'DefenderDeterministic-v4',
 'DefenderNoFrameskip-v4',
 'Defender-ram-v0',
 'Defender-ramDeterministic-v0',
 'Defender-ramNoFrameskip-v0',
 'Defender-ram-v4',
 'Defender-ramDeterministic-v4',
 'Defender-ramNoFrameskip-v4',
 'DemonAttack-v0',
 'DemonAttackDeterministic-v0',
 'DemonAttackNoFrameskip-v0',
 'DemonAttack-v4',
 'DemonAttackDeterministic-v4',
 'DemonAttackNoFrameskip-v4',
 'DemonAttack-ram-v0',
 'DemonAttack-ramDeterministic-v0',
 'DemonAttack-ramNoFrameskip-v0',
 'DemonAttack-ram-v4',
 'DemonAttack-ramDeterministic-v4',
 'DemonAttack-ramNoFrameskip-v4',
 'DoubleDunk-v0',
 'DoubleDunkDeterministic-v0',
 'DoubleDunkNoFrameskip-v0',
 'DoubleDunk-v4',
 'DoubleDunkDeterministic-v4',
 'DoubleDunkNoFrameskip-v4',
 'DoubleDunk-ram-v0',
 'DoubleDunk-ramDeterministic-v0',
 'DoubleDunk-ramNoFrameskip-v0',
 'DoubleDunk-ram-v4',
 'DoubleDunk-ramDeterministic-v4',
 'DoubleDunk-ramNoFrameskip-v4',
 'ElevatorAction-v0',
 'ElevatorActionDeterministic-v0',
 'ElevatorActionNoFrameskip-v0',
 'ElevatorAction-v4',
 'ElevatorActionDeterministic-v4',
 'ElevatorActionNoFrameskip-v4',
 'ElevatorAction-ram-v0',
 'ElevatorAction-ramDeterministic-v0',
 'ElevatorAction-ramNoFrameskip-v0',
 'ElevatorAction-ram-v4',
 'ElevatorAction-ramDeterministic-v4',
 'ElevatorAction-ramNoFrameskip-v4',
 'Enduro-v0',
 'EnduroDeterministic-v0',
 'EnduroNoFrameskip-v0',
 'Enduro-v4',
 'EnduroDeterministic-v4',
 'EnduroNoFrameskip-v4',
 'Enduro-ram-v0',
 'Enduro-ramDeterministic-v0',
 'Enduro-ramNoFrameskip-v0',
 'Enduro-ram-v4',
 'Enduro-ramDeterministic-v4',
 'Enduro-ramNoFrameskip-v4',
 'FishingDerby-v0',
 'FishingDerbyDeterministic-v0',
 'FishingDerbyNoFrameskip-v0',
 'FishingDerby-v4',
 'FishingDerbyDeterministic-v4',
 'FishingDerbyNoFrameskip-v4',
 'FishingDerby-ram-v0',
 'FishingDerby-ramDeterministic-v0',
 'FishingDerby-ramNoFrameskip-v0',
 'FishingDerby-ram-v4',
 'FishingDerby-ramDeterministic-v4',
 'FishingDerby-ramNoFrameskip-v4',
 'Freeway-v0',
 'FreewayDeterministic-v0',
 'FreewayNoFrameskip-v0',
 'Freeway-v4',
 'FreewayDeterministic-v4',
 'FreewayNoFrameskip-v4',
 'Freeway-ram-v0',
 'Freeway-ramDeterministic-v0',
 'Freeway-ramNoFrameskip-v0',
 'Freeway-ram-v4',
 'Freeway-ramDeterministic-v4',
 'Freeway-ramNoFrameskip-v4',
 'Frostbite-v0',
 'FrostbiteDeterministic-v0',
 'FrostbiteNoFrameskip-v0',
 'Frostbite-v4',
 'FrostbiteDeterministic-v4',
 'FrostbiteNoFrameskip-v4',
 'Frostbite-ram-v0',
 'Frostbite-ramDeterministic-v0',
 'Frostbite-ramNoFrameskip-v0',
 'Frostbite-ram-v4',
 'Frostbite-ramDeterministic-v4',
 'Frostbite-ramNoFrameskip-v4',
 'Gopher-v0',
 'GopherDeterministic-v0',
 'GopherNoFrameskip-v0',
 'Gopher-v4',
 'GopherDeterministic-v4',
 'GopherNoFrameskip-v4',
 'Gopher-ram-v0',
 'Gopher-ramDeterministic-v0',
 'Gopher-ramNoFrameskip-v0',
 'Gopher-ram-v4',
 'Gopher-ramDeterministic-v4',
 'Gopher-ramNoFrameskip-v4',
 'Gravitar-v0',
 'GravitarDeterministic-v0',
 'GravitarNoFrameskip-v0',
 'Gravitar-v4',
 'GravitarDeterministic-v4',
 'GravitarNoFrameskip-v4',
 'Gravitar-ram-v0',
 'Gravitar-ramDeterministic-v0',
 'Gravitar-ramNoFrameskip-v0',
 'Gravitar-ram-v4',
 'Gravitar-ramDeterministic-v4',
 'Gravitar-ramNoFrameskip-v4',
 'Hero-v0',
 'HeroDeterministic-v0',
 'HeroNoFrameskip-v0',
 'Hero-v4',
 'HeroDeterministic-v4',
 'HeroNoFrameskip-v4',
 'Hero-ram-v0',
 'Hero-ramDeterministic-v0',
 'Hero-ramNoFrameskip-v0',
 'Hero-ram-v4',
 'Hero-ramDeterministic-v4',
 'Hero-ramNoFrameskip-v4',
 'IceHockey-v0',
 'IceHockeyDeterministic-v0',
 'IceHockeyNoFrameskip-v0',
 'IceHockey-v4',
 'IceHockeyDeterministic-v4',
 'IceHockeyNoFrameskip-v4',
 'IceHockey-ram-v0',
 'IceHockey-ramDeterministic-v0',
 'IceHockey-ramNoFrameskip-v0',
 'IceHockey-ram-v4',
 'IceHockey-ramDeterministic-v4',
 'IceHockey-ramNoFrameskip-v4',
 'Jamesbond-v0',
 'JamesbondDeterministic-v0',
 'JamesbondNoFrameskip-v0',
 'Jamesbond-v4',
 'JamesbondDeterministic-v4',
 'JamesbondNoFrameskip-v4',
 'Jamesbond-ram-v0',
 'Jamesbond-ramDeterministic-v0',
 'Jamesbond-ramNoFrameskip-v0',
 'Jamesbond-ram-v4',
 'Jamesbond-ramDeterministic-v4',
 'Jamesbond-ramNoFrameskip-v4',
 'JourneyEscape-v0',
 'JourneyEscapeDeterministic-v0',
 'JourneyEscapeNoFrameskip-v0',
 'JourneyEscape-v4',
 'JourneyEscapeDeterministic-v4',
 'JourneyEscapeNoFrameskip-v4',
 'JourneyEscape-ram-v0',
 'JourneyEscape-ramDeterministic-v0',
 'JourneyEscape-ramNoFrameskip-v0',
 'JourneyEscape-ram-v4',
 'JourneyEscape-ramDeterministic-v4',
 'JourneyEscape-ramNoFrameskip-v4',
 'Kangaroo-v0',
 'KangarooDeterministic-v0',
 'KangarooNoFrameskip-v0',
 'Kangaroo-v4',
 'KangarooDeterministic-v4',
 'KangarooNoFrameskip-v4',
 'Kangaroo-ram-v0',
 'Kangaroo-ramDeterministic-v0',
 'Kangaroo-ramNoFrameskip-v0',
 'Kangaroo-ram-v4',
 'Kangaroo-ramDeterministic-v4',
 'Kangaroo-ramNoFrameskip-v4',
 'Krull-v0',
 'KrullDeterministic-v0',
 'KrullNoFrameskip-v0',
 'Krull-v4',
 'KrullDeterministic-v4',
 'KrullNoFrameskip-v4',
 'Krull-ram-v0',
 'Krull-ramDeterministic-v0',
 'Krull-ramNoFrameskip-v0',
 'Krull-ram-v4',
 'Krull-ramDeterministic-v4',
 'Krull-ramNoFrameskip-v4',
 'KungFuMaster-v0',
 'KungFuMasterDeterministic-v0',
 'KungFuMasterNoFrameskip-v0',
 'KungFuMaster-v4',
 'KungFuMasterDeterministic-v4',
 'KungFuMasterNoFrameskip-v4',
 'KungFuMaster-ram-v0',
 'KungFuMaster-ramDeterministic-v0',
 'KungFuMaster-ramNoFrameskip-v0',
 'KungFuMaster-ram-v4',
 'KungFuMaster-ramDeterministic-v4',
 'KungFuMaster-ramNoFrameskip-v4',
 'MontezumaRevenge-v0',
 'MontezumaRevengeDeterministic-v0',
 'MontezumaRevengeNoFrameskip-v0',
 'MontezumaRevenge-v4',
 'MontezumaRevengeDeterministic-v4',
 'MontezumaRevengeNoFrameskip-v4',
 'MontezumaRevenge-ram-v0',
 'MontezumaRevenge-ramDeterministic-v0',
 'MontezumaRevenge-ramNoFrameskip-v0',
 'MontezumaRevenge-ram-v4',
 'MontezumaRevenge-ramDeterministic-v4',
 'MontezumaRevenge-ramNoFrameskip-v4',
 'MsPacman-v0',
 'MsPacmanDeterministic-v0',
 'MsPacmanNoFrameskip-v0',
 'MsPacman-v4',
 'MsPacmanDeterministic-v4',
 'MsPacmanNoFrameskip-v4',
 'MsPacman-ram-v0',
 'MsPacman-ramDeterministic-v0',
 'MsPacman-ramNoFrameskip-v0',
 'MsPacman-ram-v4',
 'MsPacman-ramDeterministic-v4',
 'MsPacman-ramNoFrameskip-v4',
 'NameThisGame-v0',
 'NameThisGameDeterministic-v0',
 'NameThisGameNoFrameskip-v0',
 'NameThisGame-v4',
 'NameThisGameDeterministic-v4',
 'NameThisGameNoFrameskip-v4',
 'NameThisGame-ram-v0',
 'NameThisGame-ramDeterministic-v0',
 'NameThisGame-ramNoFrameskip-v0',
 'NameThisGame-ram-v4',
 'NameThisGame-ramDeterministic-v4',
 'NameThisGame-ramNoFrameskip-v4',
 'Phoenix-v0',
 'PhoenixDeterministic-v0',
 'PhoenixNoFrameskip-v0',
 'Phoenix-v4',
 'PhoenixDeterministic-v4',
 'PhoenixNoFrameskip-v4',
 'Phoenix-ram-v0',
 'Phoenix-ramDeterministic-v0',
 'Phoenix-ramNoFrameskip-v0',
 'Phoenix-ram-v4',
 'Phoenix-ramDeterministic-v4',
 'Phoenix-ramNoFrameskip-v4',
 'Pitfall-v0',
 'PitfallDeterministic-v0',
 'PitfallNoFrameskip-v0',
 'Pitfall-v4',
 'PitfallDeterministic-v4',
 'PitfallNoFrameskip-v4',
 'Pitfall-ram-v0',
 'Pitfall-ramDeterministic-v0',
 'Pitfall-ramNoFrameskip-v0',
 'Pitfall-ram-v4',
 'Pitfall-ramDeterministic-v4',
 'Pitfall-ramNoFrameskip-v4',
 'Pong-v0',
 'PongDeterministic-v0',
 'PongNoFrameskip-v0',
 'Pong-v4',
 'PongDeterministic-v4',
 'PongNoFrameskip-v4',
 'Pong-ram-v0',
 'Pong-ramDeterministic-v0',
 'Pong-ramNoFrameskip-v0',
 'Pong-ram-v4',
 'Pong-ramDeterministic-v4',
 'Pong-ramNoFrameskip-v4',
 'Pooyan-v0',
 'PooyanDeterministic-v0',
 'PooyanNoFrameskip-v0',
 'Pooyan-v4',
 'PooyanDeterministic-v4',
 'PooyanNoFrameskip-v4',
 'Pooyan-ram-v0',
 'Pooyan-ramDeterministic-v0',
 'Pooyan-ramNoFrameskip-v0',
 'Pooyan-ram-v4',
 'Pooyan-ramDeterministic-v4',
 'Pooyan-ramNoFrameskip-v4',
 'PrivateEye-v0',
 'PrivateEyeDeterministic-v0',
 'PrivateEyeNoFrameskip-v0',
 'PrivateEye-v4',
 'PrivateEyeDeterministic-v4',
 'PrivateEyeNoFrameskip-v4',
 'PrivateEye-ram-v0',
 'PrivateEye-ramDeterministic-v0',
 'PrivateEye-ramNoFrameskip-v0',
 'PrivateEye-ram-v4',
 'PrivateEye-ramDeterministic-v4',
 'PrivateEye-ramNoFrameskip-v4',
 'Qbert-v0',
 'QbertDeterministic-v0',
 'QbertNoFrameskip-v0',
 'Qbert-v4',
 'QbertDeterministic-v4',
 'QbertNoFrameskip-v4',
 'Qbert-ram-v0',
 'Qbert-ramDeterministic-v0',
 'Qbert-ramNoFrameskip-v0',
 'Qbert-ram-v4',
 'Qbert-ramDeterministic-v4',
 'Qbert-ramNoFrameskip-v4',
 'Riverraid-v0',
 'RiverraidDeterministic-v0',
 'RiverraidNoFrameskip-v0',
 'Riverraid-v4',
 'RiverraidDeterministic-v4',
 'RiverraidNoFrameskip-v4',
 'Riverraid-ram-v0',
 'Riverraid-ramDeterministic-v0',
 'Riverraid-ramNoFrameskip-v0',
 'Riverraid-ram-v4',
 'Riverraid-ramDeterministic-v4',
 'Riverraid-ramNoFrameskip-v4',
 'RoadRunner-v0',
 'RoadRunnerDeterministic-v0',
 'RoadRunnerNoFrameskip-v0',
 'RoadRunner-v4',
 'RoadRunnerDeterministic-v4',
 'RoadRunnerNoFrameskip-v4',
 'RoadRunner-ram-v0',
 'RoadRunner-ramDeterministic-v0',
 'RoadRunner-ramNoFrameskip-v0',
 'RoadRunner-ram-v4',
 'RoadRunner-ramDeterministic-v4',
 'RoadRunner-ramNoFrameskip-v4',
 'Robotank-v0',
 'RobotankDeterministic-v0',
 'RobotankNoFrameskip-v0',
 'Robotank-v4',
 'RobotankDeterministic-v4',
 'RobotankNoFrameskip-v4',
 'Robotank-ram-v0',
 'Robotank-ramDeterministic-v0',
 'Robotank-ramNoFrameskip-v0',
 'Robotank-ram-v4',
 'Robotank-ramDeterministic-v4',
 'Robotank-ramNoFrameskip-v4',
 'Seaquest-v0',
 'SeaquestDeterministic-v0',
 'SeaquestNoFrameskip-v0',
 'Seaquest-v4',
 'SeaquestDeterministic-v4',
 'SeaquestNoFrameskip-v4',
 'Seaquest-ram-v0',
 'Seaquest-ramDeterministic-v0',
 'Seaquest-ramNoFrameskip-v0',
 'Seaquest-ram-v4',
 'Seaquest-ramDeterministic-v4',
 'Seaquest-ramNoFrameskip-v4',
 'Skiing-v0',
 'SkiingDeterministic-v0',
 'SkiingNoFrameskip-v0',
 'Skiing-v4',
 'SkiingDeterministic-v4',
 'SkiingNoFrameskip-v4',
 'Skiing-ram-v0',
 'Skiing-ramDeterministic-v0',
 'Skiing-ramNoFrameskip-v0',
 'Skiing-ram-v4',
 'Skiing-ramDeterministic-v4',
 'Skiing-ramNoFrameskip-v4',
 'Solaris-v0',
 'SolarisDeterministic-v0',
 'SolarisNoFrameskip-v0',
 'Solaris-v4',
 'SolarisDeterministic-v4',
 'SolarisNoFrameskip-v4',
 'Solaris-ram-v0',
 'Solaris-ramDeterministic-v0',
 'Solaris-ramNoFrameskip-v0',
 'Solaris-ram-v4',
 'Solaris-ramDeterministic-v4',
 'Solaris-ramNoFrameskip-v4',
 'SpaceInvaders-v0',
 'SpaceInvadersDeterministic-v0',
 'SpaceInvadersNoFrameskip-v0',
 'SpaceInvaders-v4',
 'SpaceInvadersDeterministic-v4',
 'SpaceInvadersNoFrameskip-v4',
 'SpaceInvaders-ram-v0',
 'SpaceInvaders-ramDeterministic-v0',
 'SpaceInvaders-ramNoFrameskip-v0',
 'SpaceInvaders-ram-v4',
 'SpaceInvaders-ramDeterministic-v4',
 'SpaceInvaders-ramNoFrameskip-v4',
 'StarGunner-v0',
 'StarGunnerDeterministic-v0',
 'StarGunnerNoFrameskip-v0',
 'StarGunner-v4',
 'StarGunnerDeterministic-v4',
 'StarGunnerNoFrameskip-v4',
 'StarGunner-ram-v0',
 'StarGunner-ramDeterministic-v0',
 'StarGunner-ramNoFrameskip-v0',
 'StarGunner-ram-v4',
 'StarGunner-ramDeterministic-v4',
 'StarGunner-ramNoFrameskip-v4',
 'Tennis-v0',
 'TennisDeterministic-v0',
 'TennisNoFrameskip-v0',
 'Tennis-v4',
 'TennisDeterministic-v4',
 'TennisNoFrameskip-v4',
 'Tennis-ram-v0',
 'Tennis-ramDeterministic-v0',
 'Tennis-ramNoFrameskip-v0',
 'Tennis-ram-v4',
 'Tennis-ramDeterministic-v4',
 'Tennis-ramNoFrameskip-v4',
 'TimePilot-v0',
 'TimePilotDeterministic-v0',
 'TimePilotNoFrameskip-v0',
 'TimePilot-v4',
 'TimePilotDeterministic-v4',
 'TimePilotNoFrameskip-v4',
 'TimePilot-ram-v0',
 'TimePilot-ramDeterministic-v0',
 'TimePilot-ramNoFrameskip-v0',
 'TimePilot-ram-v4',
 'TimePilot-ramDeterministic-v4',
 'TimePilot-ramNoFrameskip-v4',
 'Tutankham-v0',
 'TutankhamDeterministic-v0',
 'TutankhamNoFrameskip-v0',
 'Tutankham-v4',
 'TutankhamDeterministic-v4',
 'TutankhamNoFrameskip-v4',
 'Tutankham-ram-v0',
 'Tutankham-ramDeterministic-v0',
 'Tutankham-ramNoFrameskip-v0',
 'Tutankham-ram-v4',
 'Tutankham-ramDeterministic-v4',
 'Tutankham-ramNoFrameskip-v4',
 'UpNDown-v0',
 'UpNDownDeterministic-v0',
 'UpNDownNoFrameskip-v0',
 'UpNDown-v4',
 'UpNDownDeterministic-v4',
 'UpNDownNoFrameskip-v4',
 'UpNDown-ram-v0',
 'UpNDown-ramDeterministic-v0',
 'UpNDown-ramNoFrameskip-v0',
 'UpNDown-ram-v4',
 'UpNDown-ramDeterministic-v4',
 'UpNDown-ramNoFrameskip-v4',
 'Venture-v0',
 'VentureDeterministic-v0',
 'VentureNoFrameskip-v0',
 'Venture-v4',
 'VentureDeterministic-v4',
 'VentureNoFrameskip-v4',
 'Venture-ram-v0',
 'Venture-ramDeterministic-v0',
 'Venture-ramNoFrameskip-v0',
 'Venture-ram-v4',
 'Venture-ramDeterministic-v4',
 'Venture-ramNoFrameskip-v4',
 'VideoPinball-v0',
 'VideoPinballDeterministic-v0',
 'VideoPinballNoFrameskip-v0',
 'VideoPinball-v4',
 'VideoPinballDeterministic-v4',
 'VideoPinballNoFrameskip-v4',
 'VideoPinball-ram-v0',
 'VideoPinball-ramDeterministic-v0',
 'VideoPinball-ramNoFrameskip-v0',
 'VideoPinball-ram-v4',
 'VideoPinball-ramDeterministic-v4',
 'VideoPinball-ramNoFrameskip-v4',
 'WizardOfWor-v0',
 'WizardOfWorDeterministic-v0',
 'WizardOfWorNoFrameskip-v0',
 'WizardOfWor-v4',
 'WizardOfWorDeterministic-v4',
 'WizardOfWorNoFrameskip-v4',
 'WizardOfWor-ram-v0',
 'WizardOfWor-ramDeterministic-v0',
 'WizardOfWor-ramNoFrameskip-v0',
 'WizardOfWor-ram-v4',
 'WizardOfWor-ramDeterministic-v4',
 'WizardOfWor-ramNoFrameskip-v4',
 'YarsRevenge-v0',
 'YarsRevengeDeterministic-v0',
 'YarsRevengeNoFrameskip-v0',
 'YarsRevenge-v4',
 'YarsRevengeDeterministic-v4',
 'YarsRevengeNoFrameskip-v4',
 'YarsRevenge-ram-v0',
 'YarsRevenge-ramDeterministic-v0',
 'YarsRevenge-ramNoFrameskip-v0',
 'YarsRevenge-ram-v4',
 'YarsRevenge-ramDeterministic-v4',
 'YarsRevenge-ramNoFrameskip-v4',
 'Zaxxon-v0',
 'ZaxxonDeterministic-v0',
 'ZaxxonNoFrameskip-v0',
 'Zaxxon-v4',
 'ZaxxonDeterministic-v4',
 'ZaxxonNoFrameskip-v4',
 'Zaxxon-ram-v0',
 'Zaxxon-ramDeterministic-v0',
 'Zaxxon-ramNoFrameskip-v0',
 'Zaxxon-ram-v4',
 'Zaxxon-ramDeterministic-v4',
 'Zaxxon-ramNoFrameskip-v4',
 'CartPole-v0',
 'CartPole-v1',
 'MountainCar-v0',
 'MountainCarContinuous-v0',
 'Pendulum-v1',
 'Acrobot-v1',
 'LunarLander-v2',
 'LunarLanderContinuous-v2',
 'BipedalWalker-v3',
 'BipedalWalkerHardcore-v3',
 'CarRacing-v2',
 'Blackjack-v1',
 'FrozenLake-v1',
 'FrozenLake8x8-v1',
 'CliffWalking-v0',
 'Taxi-v3',
 'Reacher-v2',
 'Reacher-v4',
 'Pusher-v2',
 'Pusher-v4',
 'InvertedPendulum-v2',
 'InvertedPendulum-v4',
 'InvertedDoublePendulum-v2',
 'InvertedDoublePendulum-v4',
 'HalfCheetah-v2',
 'HalfCheetah-v3',
 'HalfCheetah-v4',
 'Hopper-v2',
 'Hopper-v3',
 'Hopper-v4',
 'Swimmer-v2',
 'Swimmer-v3',
 'Swimmer-v4',
 'Walker2d-v2',
 'Walker2d-v3',
 'Walker2d-v4',
 'Ant-v2',
 'Ant-v3',
 'Ant-v4',
 'Humanoid-v2',
 'Humanoid-v3',
 'Humanoid-v4',
 'HumanoidStandup-v2',
 'HumanoidStandup-v4']

Creating Space Invaders Environment¶

In [3]:
env = gym.make('SpaceInvaders-v0')
In [4]:
height, width, channels = env.observation_space.shape
In [5]:
height, width, channels
Out[5]:
(210, 160, 3)
In [6]:
actions = env.action_space.n
In [7]:
actions
Out[7]:
6
In [8]:
env.unwrapped.get_action_meanings()
Out[8]:
['NOOP', 'FIRE', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']

5 Episodes for Trial¶

In [9]:
episodes = 5
In [11]:
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        env.render()
        action = random.choice([0,1,2,3,4,5])
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
env.close()
Episode:1 Score:105.0
Episode:2 Score:170.0
Episode:3 Score:170.0
Episode:4 Score:210.0
Episode:5 Score:105.0

Create a Deep Learning Model with Keras¶

In [12]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Convolution2D
from tensorflow.keras.optimizers import Adam
2025-01-05 15:40:24.022285: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1
In [20]:
def build_model(height, width, channels, actions):
    model = Sequential()
    model.add(Convolution2D(32, (8,8), strides=(4,4), activation='relu', input_shape=(3,height, width, channels)))
    model.add(Convolution2D(64, (4,4), strides=(2,2), activation='relu'))
    model.add(Convolution2D(64, (3,3), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model
In [21]:
model = build_model(height, width, channels, actions)
In [22]:
model.summary()
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d_3 (Conv2D)            (None, 3, 51, 39, 32)     6176      
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 3, 24, 18, 64)     32832     
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 3, 22, 16, 64)     36928     
_________________________________________________________________
flatten_1 (Flatten)          (None, 67584)             0         
_________________________________________________________________
dense_4 (Dense)              (None, 512)               34603520  
_________________________________________________________________
dense_5 (Dense)              (None, 256)               131328    
_________________________________________________________________
dense_6 (Dense)              (None, 6)                 1542      
=================================================================
Total params: 34,812,326
Trainable params: 34,812,326
Non-trainable params: 0
_________________________________________________________________

Build Agents with Keras Reinforcement Learning¶

In [23]:
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
In [ ]:
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
In [24]:
def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.2, nb_steps=10000)
    memory = SequentialMemory(limit=1000, window_length=3)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                  enable_dueling_network=True, dueling_type='avg', 
                   nb_actions=actions, nb_steps_warmup=1000
                  )
    return dqn
In [25]:
dqn = build_agent(model, actions)
In [28]:
dqn.compile(Adam(lr=1e-4))

Fitting 10000 steps¶

In [18]:
dqn.fit(env, nb_steps=10000, visualize=False, verbose=2)
Training for 10000 steps ...
 1197/10000: episode: 1, duration: 16.708s, episode steps: 1197, steps per second:  72, episode reward: 275.000, mean reward:  0.230 [ 0.000, 30.000], mean action: 2.595 [0.000, 5.000],  loss: 27.396054, mean_q: 11.414030, mean_eps: 0.901135
 1890/10000: episode: 2, duration: 41.929s, episode steps: 693, steps per second:  17, episode reward: 135.000, mean reward:  0.195 [ 0.000, 30.000], mean action: 2.505 [0.000, 5.000],  loss: 1.129293, mean_q: 9.534999, mean_eps: 0.861130
 2441/10000: episode: 3, duration: 34.082s, episode steps: 551, steps per second:  16, episode reward: 140.000, mean reward:  0.254 [ 0.000, 30.000], mean action: 2.566 [0.000, 5.000],  loss: 1.161340, mean_q: 10.541823, mean_eps: 0.805150
 3449/10000: episode: 4, duration: 61.689s, episode steps: 1008, steps per second:  16, episode reward: 320.000, mean reward:  0.317 [ 0.000, 30.000], mean action: 2.569 [0.000, 5.000],  loss: 1.287153, mean_q: 11.357870, mean_eps: 0.734995
 4513/10000: episode: 5, duration: 64.572s, episode steps: 1064, steps per second:  16, episode reward: 195.000, mean reward:  0.183 [ 0.000, 30.000], mean action: 2.456 [0.000, 5.000],  loss: 0.497081, mean_q: 9.330520, mean_eps: 0.641755
 5310/10000: episode: 6, duration: 48.119s, episode steps: 797, steps per second:  17, episode reward: 180.000, mean reward:  0.226 [ 0.000, 30.000], mean action: 2.423 [0.000, 5.000],  loss: 0.531408, mean_q: 10.102600, mean_eps: 0.558010
 6025/10000: episode: 7, duration: 43.741s, episode steps: 715, steps per second:  16, episode reward: 125.000, mean reward:  0.175 [ 0.000, 25.000], mean action: 2.438 [0.000, 5.000],  loss: 0.468921, mean_q: 10.508261, mean_eps: 0.489970
 6561/10000: episode: 8, duration: 33.157s, episode steps: 536, steps per second:  16, episode reward: 135.000, mean reward:  0.252 [ 0.000, 30.000], mean action: 2.914 [0.000, 5.000],  loss: 0.732872, mean_q: 10.142298, mean_eps: 0.433675
 7091/10000: episode: 9, duration: 32.751s, episode steps: 530, steps per second:  16, episode reward: 85.000, mean reward:  0.160 [ 0.000, 25.000], mean action: 2.574 [0.000, 5.000],  loss: 0.413073, mean_q: 10.873431, mean_eps: 0.385705
 7683/10000: episode: 10, duration: 37.227s, episode steps: 592, steps per second:  16, episode reward: 90.000, mean reward:  0.152 [ 0.000, 25.000], mean action: 2.720 [0.000, 5.000],  loss: 0.232420, mean_q: 10.754884, mean_eps: 0.335215
 8301/10000: episode: 11, duration: 38.207s, episode steps: 618, steps per second:  16, episode reward: 120.000, mean reward:  0.194 [ 0.000, 30.000], mean action: 2.620 [0.000, 5.000],  loss: 0.504104, mean_q: 10.189123, mean_eps: 0.280765
 9127/10000: episode: 12, duration: 50.743s, episode steps: 826, steps per second:  16, episode reward: 180.000, mean reward:  0.218 [ 0.000, 25.000], mean action: 2.390 [0.000, 5.000],  loss: 0.271572, mean_q: 9.491021, mean_eps: 0.215785
 9672/10000: episode: 13, duration: 33.557s, episode steps: 545, steps per second:  16, episode reward: 20.000, mean reward:  0.037 [ 0.000, 10.000], mean action: 2.550 [0.000, 5.000],  loss: 0.200701, mean_q: 8.646260, mean_eps: 0.154090
done, took 556.993 seconds
Out[18]:
<tensorflow.python.keras.callbacks.History at 0x1ddc55f4ba8>

Saving the Weights¶

In [ ]:
dqn.save_weights('SavedWeights/10k-Fast/dqn_weights.h5f')

Fitting 1,000,000 steps¶

In [ ]:
dqn.fit(env, nb_steps=10000000, visualize=False, verbose=2)

Saving the Weights¶

In [ ]:
dqn.save_weights('SavedWeights/1m/dqn_weights.h5f')

Loading the weights¶

In [32]:
dqn.load_weights('SavedWeights/1m/dqn_weights.h5f')
In [15]:
# import sys
# sys.setrecursionlimit(100000)

Printing the average reward¶

In [31]:
scores = dqn.test(env, nb_episodes=10, visualize=True)
print("The Average reward is: ", np.mean(scores.history['episode_reward']))
Testing for 10 episodes ...
Episode 1: reward: 200.000, steps: 668
Episode 2: reward: 345.000, steps: 918
Episode 3: reward: 165.000, steps: 670
Episode 4: reward: 230.000, steps: 868
Episode 5: reward: 300.000, steps: 1220
Episode 6: reward: 435.000, steps: 880
Episode 7: reward: 195.000, steps: 829
Episode 8: reward: 475.000, steps: 860
Episode 9: reward: 330.000, steps: 1420
Episode 10: reward: 225.000, steps: 924
The Average reward is:  290.0