[{"title":"(51个子文件69.27MB)深度强化学习DQN系列论文","children":[{"title":"DQN算法改进","children":[{"title":"DynamicFrameskipDeepQNetwork.pdf <span style='color:#111;'>588.35KB</span>","children":null,"spread":false},{"title":"IncreasingtheActionGapNewOperatorsforReinforcementLearning.pdf <span style='color:#111;'>979.22KB</span>","children":null,"spread":false},{"title":"DuelingNetworkArchitecturesforDeepReinforcementLearning.pdf <span style='color:#111;'>672.37KB</span>","children":null,"spread":false},{"title":"LearningtoPlayinaDayFasterDeepReinforcementLearningbyOptimalityTightening.pdf <span style='color:#111;'>1.18MB</span>","children":null,"spread":false},{"title":"SafeandEfficientOff-PolicyReinforcementLearning.pdf <span style='color:#111;'>556.93KB</span>","children":null,"spread":false},{"title":"MassivelyParallelMethodsforDeepReinforcementLearning.pdf <span style='color:#111;'>2.71MB</span>","children":null,"spread":false},{"title":"PrioritizedExperienceReplay.pdf <span style='color:#111;'>1.61MB</span>","children":null,"spread":false},{"title":"Averaged-DQNVarianceReductionandStabilizationforDeepReinforcementLearning.pdf <span style='color:#111;'>920.65KB</span>","children":null,"spread":false},{"title":"DeepReinforcementLearningwithDoubleQ-learning.pdf <span style='color:#111;'>770.57KB</span>","children":null,"spread":false},{"title":"DeepExplorationviaBootstrappedDQN.pdf <span style='color:#111;'>6.56MB</span>","children":null,"spread":false},{"title":"Learningfunctionsacrossmanyordersofmagnitudes.pdf <span style='color:#111;'>803.88KB</span>","children":null,"spread":false},{"title":"ThePredictronEnd-To-EndLearningandPlanning.pdf <span style='color:#111;'>1.74MB</span>","children":null,"spread":false},{"title":"HowtoDiscountDeepReinforcementLearningTowardsNewDynamicStrategies.pdf <span style='color:#111;'>1.02MB</span>","children":null,"spread":false},{"title":"StateoftheArtControlofAtariGamesUsingShallowReinforcementLearning.pdf <span style='color:#111;'>802.04KB</span>","children":null,"spread":false}],"spread":false},{"title":"DQN模型改进","children":[{"title":"HierarchicalDeepReinforcementLearningIntegratingTemporalAbstractionandIntrinsicMotivation.pdf <span style='color:#111;'>1.31MB</span>","children":null,"spread":false},{"title":"StrategicAttentiveWriterforLearningMacro-Actions.pdf <span style='color:#111;'>718.23KB</span>","children":null,"spread":false},{"title":"ProgressiveNeuralNetworks.pdf <span style='color:#111;'>4.08MB</span>","children":null,"spread":false},{"title":"LanguageUnderstandingforText-basedGamesUsingDeepReinforcementLearning.pdf <span style='color:#111;'>597.91KB</span>","children":null,"spread":false},{"title":"RecurrentReinforcementLearningAHybridApproach.pdf <span style='color:#111;'>430.63KB</span>","children":null,"spread":false},{"title":"ValueIterationNetworks.pdf <span style='color:#111;'>525.18KB</span>","children":null,"spread":false},{"title":"DeepRecurrentQ-LearningforPartiallyObservableMDPs.pdf <span style='color:#111;'>823.38KB</span>","children":null,"spread":false},{"title":"MazeBaseASandboxforLearningfromGames.pdf <span style='color:#111;'>394.73KB</span>","children":null,"spread":false},{"title":"ControlofMemory,ActivePerception,andActioninMinecraft.pdf <span style='color:#111;'>7.74MB</span>","children":null,"spread":false},{"title":"DeepAttentionRecurrentQ-Network.pdf <span style='color:#111;'>308.84KB</span>","children":null,"spread":false},{"title":"LearningtoCommunicatetoSolveRiddleswithDeepDistributedRecurrentQ-Networks.pdf <span style='color:#111;'>1000.42KB</span>","children":null,"spread":false}],"spread":false},{"title":"基于策略梯度的深度强化学习","children":[{"title":"DeepReinforcementLearninginParameterizedActionSpace.pdf <span style='color:#111;'>559.33KB</span>","children":null,"spread":false},{"title":"EfficientExplorationforDialoguePolicyLearningwithBBQNetworks&ReplayBufferSpiking.pdf <span style='color:#111;'>657.07KB</span>","children":null,"spread":false},{"title":"CombiningpolicygradientandQ-learning.pdf <span style='color:#111;'>1.19MB</span>","children":null,"spread":false},{"title":"LearningDeepControlPoliciesforAutonomousAerialVehicleswithMPC-GuidedPolicySearch.pdf <span style='color:#111;'>860.74KB</span>","children":null,"spread":false},{"title":"SampleEfficientActor-CriticwithExperienceReplay.pdf <span style='color:#111;'>1.38MB</span>","children":null,"spread":false},{"title":"DeterministicPolicyGradientAlgorithms.pdf <span style='color:#111;'>335.61KB</span>","children":null,"spread":false},{"title":"End-to-EndTrainingofDeepVisuomotorPolicies.pdf <span style='color:#111;'>4.51MB</span>","children":null,"spread":false},{"title":"TrustRegionPolicyOptimization.pdf <span style='color:#111;'>1000.39KB</span>","children":null,"spread":false},{"title":"Continuouscontrolwithdeepreinforcementlearning.pdf <span style='color:#111;'>648.14KB</span>","children":null,"spread":false},{"title":"CompatibleValueGradientsforReinforcementLearningofContinuousDeepPolicies(1).pdf <span style='color:#111;'>1.04MB</span>","children":null,"spread":false},{"title":"InteractiveControlofDiverseComplexCharacterswithNeuralNetworks.pdf <span style='color:#111;'>882.15KB</span>","children":null,"spread":false},{"title":"Memory-basedcontrolwithrecurrentneuralnetworks.pdf <span style='color:#111;'>677.66KB</span>","children":null,"spread":false},{"title":"CompatibleValueGradientsforReinforcementLearningofContinuousDeepPolicies.pdf <span style='color:#111;'>1.04MB</span>","children":null,"spread":false},{"title":"Q-PropSample-EfficientPolicyGradientwithAnOff-PolicyCritic.pdf <span style='color:#111;'>830.90KB</span>","children":null,"spread":false},{"title":"LearningContinuousControlPoliciesbyStochasticValueGradients.pdf <span style='color:#111;'>834.26KB</span>","children":null,"spread":false},{"title":"ContinuousDeepQ-LearningwithModel-basedAcceleration.pdf <span style='color:#111;'>1.63MB</span>","children":null,"spread":false},{"title":"Terrain-AdaptiveLocomotionSkillsUsingDeepReinforcementLearning.pdf <span style='color:#111;'>8.41MB</span>","children":null,"spread":false},{"title":"GradientEstimationUsingStochasticComputationGraphs.pdf <span style='color:#111;'>433.09KB</span>","children":null,"spread":false},{"title":"BenchmarkingDeepReinforcementLearningforContinuousControl.pdf <span style='color:#111;'>1.17MB</span>","children":null,"spread":false},{"title":"High-DimensionalContinuousControlUsingGeneralizedAdvantageEstimation.pdf <span style='color:#111;'>1.71MB</span>","children":null,"spread":false}],"spread":false},{"title":"分层DRL","children":[{"title":"StochasticNeuralNetworksforHierarchicalReinforcementLearning.pdf <span style='color:#111;'>3.08MB</span>","children":null,"spread":false},{"title":"HierarchicalDeepReinforcementLearningIntegratingTemporalAbstractionandIntrinsicMotivation.pdf <span style='color:#111;'>1.31MB</span>","children":null,"spread":false},{"title":"DeepSuccessorReinforcementLearning.pdf <span style='color:#111;'>2.14MB</span>","children":null,"spread":false},{"title":"HierarchicalReinforcementLearningusingSpatio-TemporalAbstractionsandDeepNeuralNetworks.pdf <span style='color:#111;'>1.15MB</span>","children":null,"spread":false}],"spread":true},{"title":"DQN开山篇","children":[{"title":"PlayingAtariwithDeepReinforcementLearning.pdf <span style='color:#111;'>425.39KB</span>","children":null,"spread":false},{"title":"Human-levelcontrolthroughdeepreinforcementlearning.pdf <span style='color:#111;'>4.39MB</span>","children":null,"spread":false}],"spread":true}],"spread":true}]