From 261d0ef6cd6e123892652487551c2315cd29efea Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Thu, 29 Aug 2013 16:27:00 -0700 Subject: [PATCH] core: WaitForCommunicator - more robust wait for boot This is a new built-in middleware that is more robust for waiting for boots. The "max_tries" configuration is now gone, it is timeout based. Future commits will make this even better as the SSH communicator will implement the new "wait_for_ready" in a better way. --- config/default.rb | 3 +- lib/vagrant/action.rb | 1 + .../action/builtin/mixin_provisioners.rb | 4 +- lib/vagrant/action/builtin/provision.rb | 2 + .../action/builtin/provisioner_cleanup.rb | 8 +- .../action/builtin/wait_for_communicator.rb | 76 +++++++++++++++++++ lib/vagrant/errors.rb | 8 ++ lib/vagrant/plugin/v2/communicator.rb | 21 +++++ plugins/kernel_v2/config/ssh.rb | 5 +- plugins/providers/virtualbox/action.rb | 1 + plugins/providers/virtualbox/action/boot.rb | 26 ------- templates/locales/en.yml | 26 ++++++- 12 files changed, 143 insertions(+), 38 deletions(-) create mode 100644 lib/vagrant/action/builtin/wait_for_communicator.rb diff --git a/config/default.rb b/config/default.rb index 95cc6ab6c..c5476792d 100644 --- a/config/default.rb +++ b/config/default.rb @@ -5,8 +5,7 @@ Vagrant.configure("2") do |config| config.ssh.forward_x11 = false config.ssh.guest_port = 22 config.ssh.keep_alive = true - config.ssh.max_tries = 100 - config.ssh.timeout = 30 + config.ssh.timeout = 300 config.ssh.shell = "bash -l" config.ssh.default.username = "vagrant" diff --git a/lib/vagrant/action.rb b/lib/vagrant/action.rb index 67e71decc..52164b462 100644 --- a/lib/vagrant/action.rb +++ b/lib/vagrant/action.rb @@ -24,6 +24,7 @@ module Vagrant autoload :SetHostname, "vagrant/action/builtin/set_hostname" autoload :SSHExec, "vagrant/action/builtin/ssh_exec" autoload :SSHRun, "vagrant/action/builtin/ssh_run" + autoload :WaitForCommunicator, "vagrant/action/builtin/wait_for_communicator" end module General diff --git a/lib/vagrant/action/builtin/mixin_provisioners.rb b/lib/vagrant/action/builtin/mixin_provisioners.rb index aa303cb08..ff82619ca 100644 --- a/lib/vagrant/action/builtin/mixin_provisioners.rb +++ b/lib/vagrant/action/builtin/mixin_provisioners.rb @@ -13,10 +13,10 @@ module Vagrant @_provisioner_types = {} # Get all the configured provisioners - @_provisioner_instances = env[:machine].config.vm.provisioners.map do |provisioner| + @_provisioner_instances = @env[:machine].config.vm.provisioners.map do |provisioner| # Instantiate the provisioner klass = Vagrant.plugin("2").manager.provisioners[provisioner.name] - result = klass.new(env[:machine], provisioner.config) + result = klass.new(@env[:machine], provisioner.config) # Store in the type map so that --provision-with works properly @_provisioner_types[result] = provisioner.name diff --git a/lib/vagrant/action/builtin/provision.rb b/lib/vagrant/action/builtin/provision.rb index 8e4838cbb..dbb8f5dc1 100644 --- a/lib/vagrant/action/builtin/provision.rb +++ b/lib/vagrant/action/builtin/provision.rb @@ -20,6 +20,8 @@ module Vagrant end def call(env) + @env = env + # Check if we're even provisioning things. enabled = true enabled = env[:provision_enabled] if env.has_key?(:provision_enabled) diff --git a/lib/vagrant/action/builtin/provisioner_cleanup.rb b/lib/vagrant/action/builtin/provisioner_cleanup.rb index bebfbe11f..3d826291a 100644 --- a/lib/vagrant/action/builtin/provisioner_cleanup.rb +++ b/lib/vagrant/action/builtin/provisioner_cleanup.rb @@ -1,19 +1,25 @@ require "log4r" +require_relative "mixin_provisioners" + module Vagrant module Action module Builtin # This action will run the cleanup methods on provisioners and should # be used as part of any Destroy action. class ProvisionerCleanup + include MixinProvisioners + def initialize(app, env) @app = app @logger = Log4r::Logger.new("vagrant::action::builtin::provision_cleanup") end def call(env) + @env = env + # Ask the provisioners to modify the configuration if needed - provisioners.each do |p| + provisioner_instances.each do |p| env[:ui].info(I18n.t( "vagrant.provisioner_cleanup", name: provisioner_type_map[p].to_s)) diff --git a/lib/vagrant/action/builtin/wait_for_communicator.rb b/lib/vagrant/action/builtin/wait_for_communicator.rb new file mode 100644 index 000000000..462a39b1b --- /dev/null +++ b/lib/vagrant/action/builtin/wait_for_communicator.rb @@ -0,0 +1,76 @@ +module Vagrant + module Action + module Builtin + # This waits for the communicator to be ready for a set amount of + # time. + class WaitForCommunicator + def initialize(app, env, states=nil) + @app = app + @states = states + end + + def call(env) + # Wait for ready in a thread so that we can continually check + # for interrupts. + ready_thr = Thread.new do + Thread.current[:result] = env[:machine].communicate.wait_for_ready( + env[:machine].config.ssh.timeout) + end + + # Start a thread that verifies the VM stays in a good state. + states_thr = Thread.new do + Thread.current[:result] = true + + # If we aren't caring about states, just basically put this + # thread to sleep because it'll get killed later. + if !@states + while true + sleep 300 + end + + next + end + + # Otherwise, periodically verify the VM isn't in a bad state. + while true + state = env[:machine].provider.state.id + if !@states.include?(state) + Thread.current[:result] = false + break + end + end + end + + # Wait for a result or an interrupt + env[:ui].info I18n.t("vagrant.boot_waiting") + while ready_thr.alive? && states_thr.alive? + return if env[:interrupted] + end + + # If it went into a bad state, then raise an error + if !states_thr[:result] + raise Errors::VMBootBadState, + valid: @states.join(", "), + invalid: env[:machine].provider.state.id + end + + # If it didn't boot, raise an error + if !ready_thr[:result] + raise Errors::VMBootTimeout + end + + env[:ui].info I18n.t("vagrant.boot_completed") + + # Make sure our threads are all killed + ready_thr.kill + states_thr.kill + + @app.call(env) + ensure + ready_thr.kill + states_thr.kill + end + end + end + end +end diff --git a/lib/vagrant/errors.rb b/lib/vagrant/errors.rb index 41f4a8890..05898297a 100644 --- a/lib/vagrant/errors.rb +++ b/lib/vagrant/errors.rb @@ -543,6 +543,14 @@ module Vagrant error_key(:no_base_mac, "vagrant.actions.vm.match_mac") end + class VMBootBadState < VagrantError + error_key(:boot_bad_state) + end + + class VMBootTimeout < VagrantError + error_key(:boot_timeout) + end + class VMCustomizationFailed < VagrantError error_key(:failure, "vagrant.actions.vm.customize") end diff --git a/lib/vagrant/plugin/v2/communicator.rb b/lib/vagrant/plugin/v2/communicator.rb index 97c84d4d4..2f4d0b590 100644 --- a/lib/vagrant/plugin/v2/communicator.rb +++ b/lib/vagrant/plugin/v2/communicator.rb @@ -1,3 +1,5 @@ +require "timeout" + module Vagrant module Plugin module V2 @@ -44,6 +46,25 @@ module Vagrant false end + # wait_for_ready waits until the communicator is ready, blocking + # until then. It will wait up to the given duration or raise an + # exception if something goes wrong. + def wait_for_ready(duration) + # By default, we implement a naive solution. + begin + Timeout.timeout(duration) do + while true + return true if ready? + sleep 0.2 + end + end + rescue Timeout::Error + # We timed out, we failed. + end + + return false + end + # Download a file from the remote machine to the local machine. # # @param [String] from Path of the file on the remote machine. diff --git a/plugins/kernel_v2/config/ssh.rb b/plugins/kernel_v2/config/ssh.rb index 6ed8ff5ff..d6d5cdd84 100644 --- a/plugins/kernel_v2/config/ssh.rb +++ b/plugins/kernel_v2/config/ssh.rb @@ -9,7 +9,6 @@ module VagrantPlugins attr_accessor :forward_x11 attr_accessor :guest_port attr_accessor :keep_alive - attr_accessor :max_tries attr_accessor :shell attr_accessor :timeout @@ -22,7 +21,6 @@ module VagrantPlugins @forward_x11 = UNSET_VALUE @guest_port = UNSET_VALUE @keep_alive = UNSET_VALUE - @max_tries = UNSET_VALUE @shell = UNSET_VALUE @timeout = UNSET_VALUE @@ -43,7 +41,6 @@ module VagrantPlugins @forward_x11 = false if @forward_x11 == UNSET_VALUE @guest_port = nil if @guest_port == UNSET_VALUE @keep_alive = false if @keep_alive == UNSET_VALUE - @max_tries = nil if @max_tries == UNSET_VALUE @shell = nil if @shell == UNSET_VALUE @timeout = nil if @timeout == UNSET_VALUE @@ -57,7 +54,7 @@ module VagrantPlugins def validate(machine) errors = super - [:max_tries, :timeout].each do |field| + [:timeout].each do |field| value = instance_variable_get("@#{field}".to_sym) errors << I18n.t("vagrant.config.common.error_empty", :field => field) if !value end diff --git a/plugins/providers/virtualbox/action.rb b/plugins/providers/virtualbox/action.rb index 5608d8a23..eb03dd8ee 100644 --- a/plugins/providers/virtualbox/action.rb +++ b/plugins/providers/virtualbox/action.rb @@ -71,6 +71,7 @@ module VagrantPlugins b.use SaneDefaults b.use Customize, "pre-boot" b.use Boot + b.use WaitForCommunicator, [:starting, :running] b.use Customize, "post-boot" b.use CheckGuestAdditions end diff --git a/plugins/providers/virtualbox/action/boot.rb b/plugins/providers/virtualbox/action/boot.rb index 58b3a8b4b..589a49d69 100644 --- a/plugins/providers/virtualbox/action/boot.rb +++ b/plugins/providers/virtualbox/action/boot.rb @@ -14,35 +14,9 @@ module VagrantPlugins # Start up the VM and wait for it to boot. env[:ui].info I18n.t("vagrant.actions.vm.boot.booting") env[:machine].provider.driver.start(boot_mode) - raise Vagrant::Errors::VMFailedToBoot if !wait_for_boot @app.call(env) end - - def wait_for_boot - @env[:ui].info I18n.t("vagrant.actions.vm.boot.waiting") - - @env[:machine].config.ssh.max_tries.to_i.times do |i| - if @env[:machine].communicate.ready? - @env[:ui].info I18n.t("vagrant.actions.vm.boot.ready") - return true - end - - # Return true so that the vm_failed_to_boot error doesn't - # get shown - return true if @env[:interrupted] - - # If the VM is not starting or running, something went wrong - # and we need to show a useful error. - state = @env[:machine].provider.state.id - raise Vagrant::Errors::VMFailedToRun if state != :starting && state != :running - - sleep 2 if !@env["vagrant.test"] - end - - @env[:ui].error I18n.t("vagrant.actions.vm.boot.failed") - false - end end end end diff --git a/templates/locales/en.yml b/templates/locales/en.yml index fa2b7093a..0f9d7f74b 100644 --- a/templates/locales/en.yml +++ b/templates/locales/en.yml @@ -1,5 +1,9 @@ en: vagrant: + boot_completed: |- + Machine booted and ready! + boot_waiting: |- + Waiting for machine to boot. This may take a few minutes... cfengine_bootstrapping: |- Bootstrapping CFEngine with policy server: %{policy_server}... cfengine_bootstrapping_policy_hub: |- @@ -144,6 +148,25 @@ en: Any errors that occurred are shown below. %{message} + boot_bad_state: |- + The guest machine entered an invalid state while waiting for it + to boot. Valid states are '%{valid}'. The machine is in the + '%{invalid}' state. Please verify everything is configured + properly and try again. + boot_timeout: |- + Timed out while waiting for the machine to boot. This means that + Vagrant was unable to communicate with the guest machine within + the configured ("config.ssh.timeout" value) time period. This can + mean a number of things. + + If you're using a custom box, make sure that networking is properly + working and you're able to connect to the machine. It is a common + problem that networking isn't setup properly in these boxes. + Verify that authentication configurations are also setup properly, + as well. + + If the box appears to be booting properly, you may want to increase + the timeout ("config.ssh.timeout") value. box_config_changing_box: |- While loading the Vagrantfile, the provider override specified a new box. This box, in turn, specified a different box. This isn't @@ -815,9 +838,6 @@ en: vm: boot: booting: Booting VM... - waiting: Waiting for VM to boot. This can take a few minutes. - ready: VM booted and ready for use! - failed: Failed to connect to VM! failed_to_boot: |- Failed to connect to VM via SSH. Please verify the VM successfully booted by looking at the VirtualBox GUI.