add very very long death timeout (and fix bug)

this makes workers stick around even if they lose connection

verizon is unreliable ;w;
This commit is contained in:
xenia 2021-08-02 23:31:45 -04:00
parent aec8ce34a7
commit 76b55b149c
2 changed files with 6 additions and 2 deletions

View File

@ -4,7 +4,8 @@ import json
import os
import sys
from leylines import db, SERVER_NODE_ID
from leylines import SERVER_NODE_ID
from leylines.database import Database
XDG_CACHE_HOME = os.path.expanduser(os.environ.get("XDG_CACHE_HOME", "~/.cache"))
@ -13,6 +14,9 @@ if not os.path.isdir(CACHE_DIR):
os.mkdir(CACHE_DIR, 0o700)
db = Database()
def get_ansible_config():
server_node = db.get_server_node()
nodes = [node for node in db.get_nodes() if node.ssh_key is not None]

View File

@ -5,7 +5,7 @@ Description=Dask worker for leylines
Type=simple
Environment=PATH=%h/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
Environment=LD_LIBRARY_PATH=%h/.local/lib:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
ExecStart=/bin/bash -c 'source dask-venv/bin/activate && exec dask-worker --dashboard-address {{leylines_ip}}:31336 --host {{leylines_ip}} --protocol tcp --nthreads 1 --nprocs auto --name {{inventory_hostname}} --local-directory $CACHE_DIRECTORY --resources "{{leylines_resources}}" {{leylines_server_addr}}:31337'
ExecStart=/bin/bash -c 'source dask-venv/bin/activate && exec dask-worker --dashboard-address {{leylines_ip}}:31336 --host {{leylines_ip}} --protocol tcp --nthreads 1 --nprocs auto --name {{inventory_hostname}} --local-directory $CACHE_DIRECTORY --resources "{{leylines_resources}}" --death-timeout 31536000 {{leylines_server_addr}}:31337'
WorkingDirectory=%h/dask
CacheDirectory=leylines-worker