Merge pull request #4943 from maxlinc/winrm_error_handling

Improved WinRM error handling (including better `ready?` and `wait_for_ready`)
This commit is contained in:
Shawn Neal 2015-02-16 10:16:14 -08:00
commit 92762eeb8c
8 changed files with 218 additions and 59 deletions

View File

@ -25,22 +25,89 @@ module VagrantPlugins
@logger.info("Initializing WinRMCommunicator")
end
def wait_for_ready(timeout)
Timeout.timeout(timeout) do
# Wait for winrm_info to be ready
winrm_info = nil
while true
winrm_info = Helper.winrm_info(@machine)
break if winrm_info
sleep 0.5
end
# Got it! Let the user know what we're connecting to.
@machine.ui.detail("WinRM address: #{shell.host}:#{shell.port}")
@machine.ui.detail("WinRM username: #{shell.username}")
@machine.ui.detail("WinRM transport: #{shell.config.transport}")
last_message = nil
last_message_repeat_at = 0
while true
message = nil
begin
begin
return true if ready?
rescue Vagrant::Errors::VagrantError => e
@logger.info("WinRM not ready: #{e.inspect}")
raise
end
rescue Errors::ConnectionTimeout
message = "Connection timeout."
rescue Errors::AuthenticationFailed
message = "Authentication failure."
rescue Errors::Disconnected
message = "Remote connection disconnect."
rescue Errors::ConnectionRefused
message = "Connection refused."
rescue Errors::ConnectionReset
message = "Connection reset."
rescue Errors::HostDown
message = "Host appears down."
rescue Errors::NoRoute
message = "Host unreachable."
rescue Errors::TransientError => e
# Any other retriable errors
message = e.message
end
# If we have a message to show, then show it. We don't show
# repeated messages unless they've been repeating longer than
# 10 seconds.
if message
message_at = Time.now.to_f
show_message = true
if last_message == message
show_message = (message_at - last_message_repeat_at) > 10.0
end
if show_message
@machine.ui.detail("Warning: #{message} Retrying...")
last_message = message
last_message_repeat_at = message_at
end
end
end
end
rescue Timeout::Error
return false
end
def ready?
@logger.info("Checking whether WinRM is ready...")
Timeout.timeout(@machine.config.winrm.timeout) do
result = Timeout.timeout(@machine.config.winrm.timeout) do
shell(true).powershell("hostname")
end
@logger.info("WinRM is ready!")
return true
rescue Vagrant::Errors::VagrantError => e
# We catch a `VagrantError` which would signal that something went
# wrong expectedly in the `connect`, which means we didn't connect.
rescue Errors::TransientError => e
# We catch a `TransientError` which would signal that something went
# that might work if we wait and retry.
@logger.info("WinRM not up: #{e.inspect}")
# We reset the shell to trigger calling of winrm_finder again.
# This resolves a problem when using vSphere where the ssh_info was not refreshing
# This resolves a problem when using vSphere where the winrm_info was not refreshing
# thus never getting the correct hostname.
@shell = nil
return false

View File

@ -7,6 +7,7 @@ module VagrantPlugins
attr_accessor :port
attr_accessor :guest_port
attr_accessor :max_tries
attr_accessor :retry_delay
attr_accessor :timeout
attr_accessor :transport
attr_accessor :ssl_peer_verification
@ -18,6 +19,7 @@ module VagrantPlugins
@port = UNSET_VALUE
@guest_port = UNSET_VALUE
@max_tries = UNSET_VALUE
@retry_delay = UNSET_VALUE
@timeout = UNSET_VALUE
@transport = UNSET_VALUE
@ssl_peer_verification = UNSET_VALUE
@ -31,20 +33,22 @@ module VagrantPlugins
is_ssl = @transport == :ssl
@port = (is_ssl ? 5986 : 5985) if @port == UNSET_VALUE
@guest_port = (is_ssl ? 5986 : 5985) if @guest_port == UNSET_VALUE
@max_tries = 20 if @max_tries == UNSET_VALUE
@timeout = 1800 if @timeout == UNSET_VALUE
@max_tries = 20 if @max_tries == UNSET_VALUE
@retry_delay = 2 if @retry_delay == UNSET_VALUE
@timeout = 1800 if @timeout == UNSET_VALUE
@ssl_peer_verification = true if @ssl_peer_verification == UNSET_VALUE
end
def validate(machine)
errors = []
errors << "winrm.username cannot be nil." if @username.nil?
errors << "winrm.password cannot be nil." if @password.nil?
errors << "winrm.port cannot be nil." if @port.nil?
errors << "winrm.guest_port cannot be nil." if @guest_port.nil?
errors << "winrm.max_tries cannot be nil." if @max_tries.nil?
errors << "winrm.timeout cannot be nil." if @timeout.nil?
errors << "winrm.username cannot be nil." if @username.nil?
errors << "winrm.password cannot be nil." if @password.nil?
errors << "winrm.port cannot be nil." if @port.nil?
errors << "winrm.guest_port cannot be nil." if @guest_port.nil?
errors << "winrm.max_tries cannot be nil." if @max_tries.nil?
errors << "winrm.retry_delay cannot be nil." if @max_tries.nil?
errors << "winrm.timeout cannot be nil." if @timeout.nil?
unless @ssl_peer_verification == true || @ssl_peer_verification == false
errors << "winrm.ssl_peer_verification must be a boolean."
end

View File

@ -6,8 +6,11 @@ module VagrantPlugins
error_namespace("vagrant_winrm.errors")
end
class AuthError < WinRMError
error_key(:auth_error)
class TransientError < WinRMError
end
class AuthenticationFailed < WinRMError
error_key(:authentication_failed)
end
class ExecutionError < WinRMError
@ -29,6 +32,38 @@ module VagrantPlugins
class WinRMFileTransferError < WinRMError
error_key(:winrm_file_transfer_error)
end
class InvalidTransport < WinRMError
error_key(:invalid_transport)
end
class SSLError < WinRMError
error_key(:ssl_error)
end
class ConnectionTimeout < TransientError
error_key(:connection_timeout)
end
class Disconnected < TransientError
error_key(:disconnected)
end
class ConnectionRefused < TransientError
error_key(:connection_refused)
end
class ConnectionReset < TransientError
error_key(:connection_reset)
end
class HostDown < TransientError
error_key(:host_down)
end
class NoRoute < TransientError
error_key(:no_route)
end
end
end
end

View File

@ -22,6 +22,7 @@ module VagrantPlugins
@@exceptions_to_retry_on = [
HTTPClient::KeepAliveDisconnected,
WinRM::WinRMHTTPTransportError,
WinRM::WinRMAuthorizationError,
Errno::EACCES,
Errno::EADDRINUSE,
Errno::ECONNREFUSED,
@ -87,7 +88,7 @@ module VagrantPlugins
end
def execute_shell_with_retry(command, shell, &block)
retryable(tries: @config.max_tries, on: @@exceptions_to_retry_on, sleep: 10) do
retryable(tries: @config.max_tries, on: @@exceptions_to_retry_on, sleep: @config.retry_delay) do
@logger.debug("#{shell} executing:\n#{command}")
output = session.send(shell, command) do |out, err|
block.call(:stdout, out) if block_given? && out
@ -114,20 +115,42 @@ module VagrantPlugins
end
end
def raise_winrm_exception(winrm_exception, shell, command)
# If the error is a 401, we can return a more specific error message
if winrm_exception.message.include?("401")
raise Errors::AuthError,
user: @username,
password: @password,
endpoint: endpoint,
message: winrm_exception.message
def raise_winrm_exception(exception, shell = nil, command = nil)
case exception
when WinRM::WinRMAuthorizationError
raise Errors::AuthenticationFailed,
user: @config.username,
password: @config.password,
endpoint: endpoint,
message: exception.message
when WinRM::WinRMHTTPTransportError
raise Errors::ExecutionError,
shell: shell,
command: command,
message: exception.message
when OpenSSL::SSL::SSLError
raise Errors::SSLError, message: exception.message
when HTTPClient::TimeoutError
raise Errors::ConnectionTimeout, message: exception.message
when Errno::ECONNREFUSED
# This is raised if we failed to connect the max amount of times
raise Errors::ConnectionRefused
when Errno::ECONNRESET
# This is raised if we failed to connect the max number of times
# due to an ECONNRESET.
raise Errors::ConnectionReset
when Errno::EHOSTDOWN
# This is raised if we get an ICMP DestinationUnknown error.
raise Errors::HostDown
when Errno::EHOSTUNREACH
# This is raised if we can't work out how to route traffic.
raise Errors::NoRoute
else
raise Errors::ExecutionError,
shell: shell,
command: command,
message: exception.message
end
raise Errors::ExecutionError,
shell: shell,
command: command,
message: winrm_exception.message
end
def new_session

View File

@ -1,11 +1,10 @@
en:
vagrant_winrm:
errors:
auth_error: |-
authentication_failed: |-
An authorization error occurred while connecting to WinRM.
User: %{user}
Password: %{password}
Endpoint: %{endpoint}
Message: %{message}
winrm_bad_exit_status: |-
@ -29,6 +28,15 @@ en:
Message: %{message}
invalid_shell: |-
%{shell} is not a supported type of Windows shell.
invalid_transport: |-
%{transport} is not a supported WinRM transport.
ssl_error: |-
An SSL error occurred while connecting to WinRM. This usually
occurs when you are using a self-signed certificate and have
not set the WinRM `ssl_peer_verification` config setting to false.
Message: %{message}
winrm_not_ready: |-
The box is not able to report an address for WinRM to connect to yet.
WinRM cannot access this Vagrant environment. Please wait for the
@ -39,3 +47,36 @@ en:
From: %{from}
To: %{to}
Message: %{message}
connection_refused: |-
WinRM connection was refused! This usually happens if the VM failed to
boot properly. Some steps to try to fix this: First, try reloading your
VM with `vagrant reload`, since a simple restart sometimes fixes things.
If that doesn't work, destroy your VM and recreate it with a `vagrant destroy`
followed by a `vagrant up`. If that doesn't work, contact a Vagrant
maintainer (support channels listed on the website) for more assistance.
connection_reset: |-
WinRM connection was reset! This usually happens when the machine is
taking too long to reboot. First, try reloading your machine with
`vagrant reload`, since a simple restart sometimes fixes things.
If that doesn't work, destroy your machine and recreate it with
a `vagrant destroy` followed by a `vagrant up`. If that doesn't work,
contact support.
connection_timeout: |-
Vagrant timed out while attempting to connect via WinRM. This usually
means that the VM booted, but there are issues with the WinRM configuration
or network connectivity issues. Please try to `vagrant reload` or
`vagrant up` again.
disconnected: |-
The WinRM connection was unexpectedly closed by the remote end. This
usually indicates that WinRM within the guest machine was unable to
properly start up. Please boot the VM in GUI mode to check whether
it is booting properly.
no_route: |-
While attempting to connect with WinRM, a "no route to host" (EHOSTUNREACH)
error was received. Please verify your network settings are correct
and try again.
host_down: |-
While attempting to connect with WinRM, a "host is down" (EHOSTDOWN)
error was received. Please verify your WinRM settings are correct
and try again.

View File

@ -28,11 +28,16 @@ describe VagrantPlugins::CommunicatorWinRM::Communicator do
expect(subject.ready?).to be_true
end
it "returns false if hostname command fails to execute without error" do
expect(shell).to receive(:powershell).with("hostname").and_raise(Vagrant::Errors::VagrantError)
it "returns false if hostname command fails with a transient error" do
expect(shell).to receive(:powershell).with("hostname").and_raise(VagrantPlugins::CommunicatorWinRM::Errors::TransientError)
expect(subject.ready?).to be_false
end
it "raises an error if hostname command fails with an unknown error" do
expect(shell).to receive(:powershell).with("hostname").and_raise(Vagrant::Errors::VagrantError)
expect { subject.ready? }.to raise_error(Vagrant::Errors::VagrantError)
end
it "raises timeout error when hostname command takes longer then winrm timeout" do
expect(shell).to receive(:powershell).with("hostname") do
sleep 2 # winrm.timeout = 1

View File

@ -12,6 +12,8 @@ describe VagrantPlugins::CommunicatorWinRM::WinRMShell do
VagrantPlugins::CommunicatorWinRM::Config.new.tap do |c|
c.username = 'username'
c.password = 'password'
c.max_tries = 3
c.retry_delay = 0
c.finalize!
end
}
@ -28,11 +30,15 @@ describe VagrantPlugins::CommunicatorWinRM::WinRMShell do
expect(subject.powershell("dir")[:exitcode]).to eq(0)
end
it "should raise auth error when exception message contains 401" do
expect(session).to receive(:powershell).with(/^dir.+/).and_raise(
StandardError.new("Oh no! a 401 SOAP error!"))
it "should retry when a WinRMAuthorizationError is received" do
expect(session).to receive(:powershell).with(/^dir.+/).exactly(3).times.and_raise(
# Note: The initialize for WinRMAuthorizationError may require a status_code as
# the second argument in a future WinRM release. Currently it doesn't track the
# status code.
WinRM::WinRMAuthorizationError.new("Oh no!! Unauthrorized")
)
expect { subject.powershell("dir") }.to raise_error(
VagrantPlugins::CommunicatorWinRM::Errors::AuthError)
VagrantPlugins::CommunicatorWinRM::Errors::AuthenticationFailed)
end
it "should raise an execution error when an exception occurs" do

View File

@ -128,28 +128,6 @@ describe VagrantPlugins::Kernel_V2::VMConfig do
end
end
describe "#define" do
it "should allow regular names" do
subject.define "foo"
subject.finalize!
assert_valid
end
[
"foo [1]",
"bar {2}",
"foo/bar",
].each do |name|
it "should disallow names with brackets" do
subject.define name
subject.finalize!
assert_invalid
end
end
end
describe "#guest" do
it "is nil by default" do
subject.finalize!