Merge pull request #4943 from maxlinc/winrm_error_handling

Improved WinRM error handling (including better `ready?` and `wait_for_ready`)
This commit is contained in:
Shawn Neal 2015-02-16 10:16:14 -08:00
commit 92762eeb8c
8 changed files with 218 additions and 59 deletions

View File

@ -25,22 +25,89 @@ module VagrantPlugins
@logger.info("Initializing WinRMCommunicator") @logger.info("Initializing WinRMCommunicator")
end end
def wait_for_ready(timeout)
Timeout.timeout(timeout) do
# Wait for winrm_info to be ready
winrm_info = nil
while true
winrm_info = Helper.winrm_info(@machine)
break if winrm_info
sleep 0.5
end
# Got it! Let the user know what we're connecting to.
@machine.ui.detail("WinRM address: #{shell.host}:#{shell.port}")
@machine.ui.detail("WinRM username: #{shell.username}")
@machine.ui.detail("WinRM transport: #{shell.config.transport}")
last_message = nil
last_message_repeat_at = 0
while true
message = nil
begin
begin
return true if ready?
rescue Vagrant::Errors::VagrantError => e
@logger.info("WinRM not ready: #{e.inspect}")
raise
end
rescue Errors::ConnectionTimeout
message = "Connection timeout."
rescue Errors::AuthenticationFailed
message = "Authentication failure."
rescue Errors::Disconnected
message = "Remote connection disconnect."
rescue Errors::ConnectionRefused
message = "Connection refused."
rescue Errors::ConnectionReset
message = "Connection reset."
rescue Errors::HostDown
message = "Host appears down."
rescue Errors::NoRoute
message = "Host unreachable."
rescue Errors::TransientError => e
# Any other retriable errors
message = e.message
end
# If we have a message to show, then show it. We don't show
# repeated messages unless they've been repeating longer than
# 10 seconds.
if message
message_at = Time.now.to_f
show_message = true
if last_message == message
show_message = (message_at - last_message_repeat_at) > 10.0
end
if show_message
@machine.ui.detail("Warning: #{message} Retrying...")
last_message = message
last_message_repeat_at = message_at
end
end
end
end
rescue Timeout::Error
return false
end
def ready? def ready?
@logger.info("Checking whether WinRM is ready...") @logger.info("Checking whether WinRM is ready...")
Timeout.timeout(@machine.config.winrm.timeout) do result = Timeout.timeout(@machine.config.winrm.timeout) do
shell(true).powershell("hostname") shell(true).powershell("hostname")
end end
@logger.info("WinRM is ready!") @logger.info("WinRM is ready!")
return true return true
rescue Vagrant::Errors::VagrantError => e rescue Errors::TransientError => e
# We catch a `VagrantError` which would signal that something went # We catch a `TransientError` which would signal that something went
# wrong expectedly in the `connect`, which means we didn't connect. # that might work if we wait and retry.
@logger.info("WinRM not up: #{e.inspect}") @logger.info("WinRM not up: #{e.inspect}")
# We reset the shell to trigger calling of winrm_finder again. # We reset the shell to trigger calling of winrm_finder again.
# This resolves a problem when using vSphere where the ssh_info was not refreshing # This resolves a problem when using vSphere where the winrm_info was not refreshing
# thus never getting the correct hostname. # thus never getting the correct hostname.
@shell = nil @shell = nil
return false return false

View File

@ -7,6 +7,7 @@ module VagrantPlugins
attr_accessor :port attr_accessor :port
attr_accessor :guest_port attr_accessor :guest_port
attr_accessor :max_tries attr_accessor :max_tries
attr_accessor :retry_delay
attr_accessor :timeout attr_accessor :timeout
attr_accessor :transport attr_accessor :transport
attr_accessor :ssl_peer_verification attr_accessor :ssl_peer_verification
@ -18,6 +19,7 @@ module VagrantPlugins
@port = UNSET_VALUE @port = UNSET_VALUE
@guest_port = UNSET_VALUE @guest_port = UNSET_VALUE
@max_tries = UNSET_VALUE @max_tries = UNSET_VALUE
@retry_delay = UNSET_VALUE
@timeout = UNSET_VALUE @timeout = UNSET_VALUE
@transport = UNSET_VALUE @transport = UNSET_VALUE
@ssl_peer_verification = UNSET_VALUE @ssl_peer_verification = UNSET_VALUE
@ -31,20 +33,22 @@ module VagrantPlugins
is_ssl = @transport == :ssl is_ssl = @transport == :ssl
@port = (is_ssl ? 5986 : 5985) if @port == UNSET_VALUE @port = (is_ssl ? 5986 : 5985) if @port == UNSET_VALUE
@guest_port = (is_ssl ? 5986 : 5985) if @guest_port == UNSET_VALUE @guest_port = (is_ssl ? 5986 : 5985) if @guest_port == UNSET_VALUE
@max_tries = 20 if @max_tries == UNSET_VALUE @max_tries = 20 if @max_tries == UNSET_VALUE
@timeout = 1800 if @timeout == UNSET_VALUE @retry_delay = 2 if @retry_delay == UNSET_VALUE
@timeout = 1800 if @timeout == UNSET_VALUE
@ssl_peer_verification = true if @ssl_peer_verification == UNSET_VALUE @ssl_peer_verification = true if @ssl_peer_verification == UNSET_VALUE
end end
def validate(machine) def validate(machine)
errors = [] errors = []
errors << "winrm.username cannot be nil." if @username.nil? errors << "winrm.username cannot be nil." if @username.nil?
errors << "winrm.password cannot be nil." if @password.nil? errors << "winrm.password cannot be nil." if @password.nil?
errors << "winrm.port cannot be nil." if @port.nil? errors << "winrm.port cannot be nil." if @port.nil?
errors << "winrm.guest_port cannot be nil." if @guest_port.nil? errors << "winrm.guest_port cannot be nil." if @guest_port.nil?
errors << "winrm.max_tries cannot be nil." if @max_tries.nil? errors << "winrm.max_tries cannot be nil." if @max_tries.nil?
errors << "winrm.timeout cannot be nil." if @timeout.nil? errors << "winrm.retry_delay cannot be nil." if @max_tries.nil?
errors << "winrm.timeout cannot be nil." if @timeout.nil?
unless @ssl_peer_verification == true || @ssl_peer_verification == false unless @ssl_peer_verification == true || @ssl_peer_verification == false
errors << "winrm.ssl_peer_verification must be a boolean." errors << "winrm.ssl_peer_verification must be a boolean."
end end

View File

@ -6,8 +6,11 @@ module VagrantPlugins
error_namespace("vagrant_winrm.errors") error_namespace("vagrant_winrm.errors")
end end
class AuthError < WinRMError class TransientError < WinRMError
error_key(:auth_error) end
class AuthenticationFailed < WinRMError
error_key(:authentication_failed)
end end
class ExecutionError < WinRMError class ExecutionError < WinRMError
@ -29,6 +32,38 @@ module VagrantPlugins
class WinRMFileTransferError < WinRMError class WinRMFileTransferError < WinRMError
error_key(:winrm_file_transfer_error) error_key(:winrm_file_transfer_error)
end end
class InvalidTransport < WinRMError
error_key(:invalid_transport)
end
class SSLError < WinRMError
error_key(:ssl_error)
end
class ConnectionTimeout < TransientError
error_key(:connection_timeout)
end
class Disconnected < TransientError
error_key(:disconnected)
end
class ConnectionRefused < TransientError
error_key(:connection_refused)
end
class ConnectionReset < TransientError
error_key(:connection_reset)
end
class HostDown < TransientError
error_key(:host_down)
end
class NoRoute < TransientError
error_key(:no_route)
end
end end
end end
end end

View File

@ -22,6 +22,7 @@ module VagrantPlugins
@@exceptions_to_retry_on = [ @@exceptions_to_retry_on = [
HTTPClient::KeepAliveDisconnected, HTTPClient::KeepAliveDisconnected,
WinRM::WinRMHTTPTransportError, WinRM::WinRMHTTPTransportError,
WinRM::WinRMAuthorizationError,
Errno::EACCES, Errno::EACCES,
Errno::EADDRINUSE, Errno::EADDRINUSE,
Errno::ECONNREFUSED, Errno::ECONNREFUSED,
@ -87,7 +88,7 @@ module VagrantPlugins
end end
def execute_shell_with_retry(command, shell, &block) def execute_shell_with_retry(command, shell, &block)
retryable(tries: @config.max_tries, on: @@exceptions_to_retry_on, sleep: 10) do retryable(tries: @config.max_tries, on: @@exceptions_to_retry_on, sleep: @config.retry_delay) do
@logger.debug("#{shell} executing:\n#{command}") @logger.debug("#{shell} executing:\n#{command}")
output = session.send(shell, command) do |out, err| output = session.send(shell, command) do |out, err|
block.call(:stdout, out) if block_given? && out block.call(:stdout, out) if block_given? && out
@ -114,20 +115,42 @@ module VagrantPlugins
end end
end end
def raise_winrm_exception(winrm_exception, shell, command) def raise_winrm_exception(exception, shell = nil, command = nil)
# If the error is a 401, we can return a more specific error message case exception
if winrm_exception.message.include?("401") when WinRM::WinRMAuthorizationError
raise Errors::AuthError, raise Errors::AuthenticationFailed,
user: @username, user: @config.username,
password: @password, password: @config.password,
endpoint: endpoint, endpoint: endpoint,
message: winrm_exception.message message: exception.message
when WinRM::WinRMHTTPTransportError
raise Errors::ExecutionError,
shell: shell,
command: command,
message: exception.message
when OpenSSL::SSL::SSLError
raise Errors::SSLError, message: exception.message
when HTTPClient::TimeoutError
raise Errors::ConnectionTimeout, message: exception.message
when Errno::ECONNREFUSED
# This is raised if we failed to connect the max amount of times
raise Errors::ConnectionRefused
when Errno::ECONNRESET
# This is raised if we failed to connect the max number of times
# due to an ECONNRESET.
raise Errors::ConnectionReset
when Errno::EHOSTDOWN
# This is raised if we get an ICMP DestinationUnknown error.
raise Errors::HostDown
when Errno::EHOSTUNREACH
# This is raised if we can't work out how to route traffic.
raise Errors::NoRoute
else
raise Errors::ExecutionError,
shell: shell,
command: command,
message: exception.message
end end
raise Errors::ExecutionError,
shell: shell,
command: command,
message: winrm_exception.message
end end
def new_session def new_session

View File

@ -1,11 +1,10 @@
en: en:
vagrant_winrm: vagrant_winrm:
errors: errors:
auth_error: |- authentication_failed: |-
An authorization error occurred while connecting to WinRM. An authorization error occurred while connecting to WinRM.
User: %{user} User: %{user}
Password: %{password}
Endpoint: %{endpoint} Endpoint: %{endpoint}
Message: %{message} Message: %{message}
winrm_bad_exit_status: |- winrm_bad_exit_status: |-
@ -29,6 +28,15 @@ en:
Message: %{message} Message: %{message}
invalid_shell: |- invalid_shell: |-
%{shell} is not a supported type of Windows shell. %{shell} is not a supported type of Windows shell.
invalid_transport: |-
%{transport} is not a supported WinRM transport.
ssl_error: |-
An SSL error occurred while connecting to WinRM. This usually
occurs when you are using a self-signed certificate and have
not set the WinRM `ssl_peer_verification` config setting to false.
Message: %{message}
winrm_not_ready: |- winrm_not_ready: |-
The box is not able to report an address for WinRM to connect to yet. The box is not able to report an address for WinRM to connect to yet.
WinRM cannot access this Vagrant environment. Please wait for the WinRM cannot access this Vagrant environment. Please wait for the
@ -39,3 +47,36 @@ en:
From: %{from} From: %{from}
To: %{to} To: %{to}
Message: %{message} Message: %{message}
connection_refused: |-
WinRM connection was refused! This usually happens if the VM failed to
boot properly. Some steps to try to fix this: First, try reloading your
VM with `vagrant reload`, since a simple restart sometimes fixes things.
If that doesn't work, destroy your VM and recreate it with a `vagrant destroy`
followed by a `vagrant up`. If that doesn't work, contact a Vagrant
maintainer (support channels listed on the website) for more assistance.
connection_reset: |-
WinRM connection was reset! This usually happens when the machine is
taking too long to reboot. First, try reloading your machine with
`vagrant reload`, since a simple restart sometimes fixes things.
If that doesn't work, destroy your machine and recreate it with
a `vagrant destroy` followed by a `vagrant up`. If that doesn't work,
contact support.
connection_timeout: |-
Vagrant timed out while attempting to connect via WinRM. This usually
means that the VM booted, but there are issues with the WinRM configuration
or network connectivity issues. Please try to `vagrant reload` or
`vagrant up` again.
disconnected: |-
The WinRM connection was unexpectedly closed by the remote end. This
usually indicates that WinRM within the guest machine was unable to
properly start up. Please boot the VM in GUI mode to check whether
it is booting properly.
no_route: |-
While attempting to connect with WinRM, a "no route to host" (EHOSTUNREACH)
error was received. Please verify your network settings are correct
and try again.
host_down: |-
While attempting to connect with WinRM, a "host is down" (EHOSTDOWN)
error was received. Please verify your WinRM settings are correct
and try again.

View File

@ -28,11 +28,16 @@ describe VagrantPlugins::CommunicatorWinRM::Communicator do
expect(subject.ready?).to be_true expect(subject.ready?).to be_true
end end
it "returns false if hostname command fails to execute without error" do it "returns false if hostname command fails with a transient error" do
expect(shell).to receive(:powershell).with("hostname").and_raise(Vagrant::Errors::VagrantError) expect(shell).to receive(:powershell).with("hostname").and_raise(VagrantPlugins::CommunicatorWinRM::Errors::TransientError)
expect(subject.ready?).to be_false expect(subject.ready?).to be_false
end end
it "raises an error if hostname command fails with an unknown error" do
expect(shell).to receive(:powershell).with("hostname").and_raise(Vagrant::Errors::VagrantError)
expect { subject.ready? }.to raise_error(Vagrant::Errors::VagrantError)
end
it "raises timeout error when hostname command takes longer then winrm timeout" do it "raises timeout error when hostname command takes longer then winrm timeout" do
expect(shell).to receive(:powershell).with("hostname") do expect(shell).to receive(:powershell).with("hostname") do
sleep 2 # winrm.timeout = 1 sleep 2 # winrm.timeout = 1

View File

@ -12,6 +12,8 @@ describe VagrantPlugins::CommunicatorWinRM::WinRMShell do
VagrantPlugins::CommunicatorWinRM::Config.new.tap do |c| VagrantPlugins::CommunicatorWinRM::Config.new.tap do |c|
c.username = 'username' c.username = 'username'
c.password = 'password' c.password = 'password'
c.max_tries = 3
c.retry_delay = 0
c.finalize! c.finalize!
end end
} }
@ -28,11 +30,15 @@ describe VagrantPlugins::CommunicatorWinRM::WinRMShell do
expect(subject.powershell("dir")[:exitcode]).to eq(0) expect(subject.powershell("dir")[:exitcode]).to eq(0)
end end
it "should raise auth error when exception message contains 401" do it "should retry when a WinRMAuthorizationError is received" do
expect(session).to receive(:powershell).with(/^dir.+/).and_raise( expect(session).to receive(:powershell).with(/^dir.+/).exactly(3).times.and_raise(
StandardError.new("Oh no! a 401 SOAP error!")) # Note: The initialize for WinRMAuthorizationError may require a status_code as
# the second argument in a future WinRM release. Currently it doesn't track the
# status code.
WinRM::WinRMAuthorizationError.new("Oh no!! Unauthrorized")
)
expect { subject.powershell("dir") }.to raise_error( expect { subject.powershell("dir") }.to raise_error(
VagrantPlugins::CommunicatorWinRM::Errors::AuthError) VagrantPlugins::CommunicatorWinRM::Errors::AuthenticationFailed)
end end
it "should raise an execution error when an exception occurs" do it "should raise an execution error when an exception occurs" do

View File

@ -128,28 +128,6 @@ describe VagrantPlugins::Kernel_V2::VMConfig do
end end
end end
describe "#define" do
it "should allow regular names" do
subject.define "foo"
subject.finalize!
assert_valid
end
[
"foo [1]",
"bar {2}",
"foo/bar",
].each do |name|
it "should disallow names with brackets" do
subject.define name
subject.finalize!
assert_invalid
end
end
end
describe "#guest" do describe "#guest" do
it "is nil by default" do it "is nil by default" do
subject.finalize! subject.finalize!