From 54c3a8660fb88da7d06151ef2129c654cee8b2b3 Mon Sep 17 00:00:00 2001 From: Jan Vales Date: Sun, 20 Jan 2019 04:07:26 +0100 Subject: [PATCH] code cleanup and fixes. --- g6t4.yml | 10 +++++----- service-failover/Dockerfile | 3 ++- service-failover/README.md | 4 ++-- service-failover/main.py | 35 ++++++++++++++++++++--------------- 4 files changed, 29 insertions(+), 23 deletions(-) diff --git a/g6t4.yml b/g6t4.yml index 6786362..7d05546 100644 --- a/g6t4.yml +++ b/g6t4.yml @@ -5,7 +5,7 @@ services: container_name: camunda ports: - "8085:8080" - restart: always +# restart: always depends_on: - service-analysis - service-reporting @@ -16,7 +16,7 @@ services: container_name: analysis ports: - "8081:8081" - restart: always +# restart: always service-failover: build: ./service-failover @@ -30,20 +30,20 @@ services: container_name: reporting ports: - "8083:8083" - restart: always +# restart: always service-twitter: build: ./service-twitter container_name: twitter ports: - "8084:8084" - restart: always +# restart: always service-website: build: ./service-website container_name: website ports: - "8080:8080" - restart: always +# restart: always depends_on: - camunda diff --git a/service-failover/Dockerfile b/service-failover/Dockerfile index 577c3ef..faa1607 100644 --- a/service-failover/Dockerfile +++ b/service-failover/Dockerfile @@ -6,6 +6,7 @@ COPY . /app/ WORKDIR /app RUN ["pip", "install", "-r", "requirements.txt"] -RUN ["useradd", "--no-create-home", "failover"] +# Container must run as root. :/ +#RUN ["useradd", "--no-create-home", "failover"] #USER failover:failover ENTRYPOINT ["python3.7", "-u", "./main.py"] diff --git a/service-failover/README.md b/service-failover/README.md index 89c9422..f9fe8a9 100644 --- a/service-failover/README.md +++ b/service-failover/README.md @@ -1,5 +1,5 @@ # Fallback service -Periodically checks wether all services are in a healthy state. +This service checks for dead or hanging services and restarts them. -If some service fails, this service will restart it. +This service provides no API and is the only one using the restart:always docker-compose config option. diff --git a/service-failover/main.py b/service-failover/main.py index 4942db9..46757d1 100644 --- a/service-failover/main.py +++ b/service-failover/main.py @@ -13,13 +13,14 @@ import threading from tsprint import print + services = dict() # Service, check url, initial delay, timeout, dockername -services["analysis"] = ["http://analysis:8081/health", 30, 15, "analysis"] -services["camunda"] = ["http://camunda:8085/health", 300, 15, "camunda"] -services["reporting"] = ["http://reporting:8083/health", 30, 15, "reporting"] -services["twitter"] = ["http://twitter:8084/health", 30, 15, "twitter"] -services["website"] = ["http://website:8080/health", 30, 15, "website"] +services["analysis"] = ["http://analysis:8081/health", 30, 15, "analysis"] +services["camunda"] = ["http://camunda:8085/health", 300, 15, "camunda"] +services["reporting"] = ["http://reporting:8083/health", 30, 15, "reporting"] +services["twitter"] = ["http://twitter:8084/actuator/health", 30, 15, "twitter"] +services["website"] = ["http://website:8080/", 30, 15, "website"] @@ -32,20 +33,18 @@ def check_service(servicename): r = requests.get(services[servicename][0], timeout=3) if r.status_code != 200: raise Exception("errorcode != 200: "+int(r.status_code)) + print("+ "+servicename+" -- looks good :)") - # schedule next check. + # schedule next check th = threading.Timer(services[servicename][2], check_service, args=[servicename]) th.setName('failover_'+servicename) th.start() except: print("- "+servicename+" -- looks bad :( "+str(sys.exc_info()[0])) - try: - reset_service(servicename) - except: - print("! Docker said NO/service restart failed."+str(sys.exc_info()[0])) + reset_service(servicename) - # schedule next check using startup delay. + # schedule next check using startup delay th = threading.Timer(services[servicename][1], check_service, args=[servicename]) th.setName('failover_'+servicename) th.start() @@ -55,9 +54,11 @@ def check_service(servicename): def reset_service(servicename): global services global docker_ctl - print(" "+servicename+" -- resetting service") - docker_ctl.containers.client.api.kill(services[servicename][3]) - docker_ctl.containers.client.api.restart(services[servicename][3]) + print(" "+servicename+" -- restarting service") + try: + docker_ctl.containers.client.api.restart(services[servicename][3], timeout=1) + except: + print("! Docker said NO/service restart failed."+str(sys.exc_info()[0])) @@ -75,8 +76,12 @@ if __name__ == "__main__": print("# Failover service #") print("####################") + pprint(services) + print() + + print("Starting watchers") + for servicename, service in services.items(): - print(" "+servicename+" -- postponing initial check for "+str(service[1])+" sec") th = threading.Timer(service[1], check_service, args=[servicename]) th.setName('failover_'+servicename) th.start() -- 2.43.0