Merge branch '48-create-failover-service' into 'master'
authorSebastian Steiner <e1029038@student.tuwien.ac.at>
Sun, 20 Jan 2019 16:43:42 +0000 (17:43 +0100)
committerSebastian Steiner <e1029038@student.tuwien.ac.at>
Sun, 20 Jan 2019 16:43:42 +0000 (17:43 +0100)
Resolve "Create failover service"

Closes #48

See merge request aic18/G6T4!45

g6t4.yml
service-failover/Dockerfile [new file with mode: 0644]
service-failover/README.md [new file with mode: 0644]
service-failover/main.py [new file with mode: 0644]
service-failover/requirements.txt [new file with mode: 0644]
service-failover/tsprint.py [new file with mode: 0644]
service-fallback/Dockerfile [deleted file]
service-fallback/README.md [deleted file]

index 9bfe93f327764a1aab91c8e777f4cc1ee3c54d63..52e351fe6c05e37a17af6cc3db0e69f0e14790b1 100644 (file)
--- a/g6t4.yml
+++ b/g6t4.yml
@@ -4,11 +4,10 @@ services:
     build: ./camunda-overlay
     container_name: camunda
     ports:
+#    restart: always
       - "8085:8085"
-    restart: always
     depends_on:
       - service-analysis
-      - service-fallback
       - service-reporting
       - service-twitter
 
@@ -17,27 +16,28 @@ services:
     container_name: analysis
     ports:
       - "8081:8081"
-    restart: always
+#    restart: always
 
-  service-fallback:
-    build: ./service-fallback
-    container_name: fallback
-    ports:
-      - "8082:8082"
+  service-failover:
+    build: ./service-failover
+    container_name: failover
+    volumes:
+      - /var/run/docker.sock:/var/run/docker.sock
+    restart: always
 
   service-reporting:
     build: ./service-reporting
     container_name: reporting
     ports:
       - "8083:8083"
-    restart: always
+#    restart: always
 
   service-twitter:
     build: ./service-twitter
     container_name: twitter
     ports:
       - "8084:8084"
-    restart: always
+#    restart: always
 
   service-website:
     build: ./service-website
diff --git a/service-failover/Dockerfile b/service-failover/Dockerfile
new file mode 100644 (file)
index 0000000..faa1607
--- /dev/null
@@ -0,0 +1,12 @@
+FROM python:3.7-slim
+LABEL maintainer="Jan Vales <jan.vales@tuwien.ac.at>"
+
+COPY . /app/
+
+WORKDIR /app
+RUN ["pip", "install", "-r", "requirements.txt"]
+
+# Container must run as root. :/
+#RUN ["useradd", "--no-create-home", "failover"]
+#USER failover:failover
+ENTRYPOINT ["python3.7", "-u", "./main.py"]
diff --git a/service-failover/README.md b/service-failover/README.md
new file mode 100644 (file)
index 0000000..f9fe8a9
--- /dev/null
@@ -0,0 +1,5 @@
+# Fallback service
+
+This service checks for dead or hanging services and restarts them.
+
+This service provides no API and is the only one using the restart:always docker-compose config option.
diff --git a/service-failover/main.py b/service-failover/main.py
new file mode 100644 (file)
index 0000000..2f90406
--- /dev/null
@@ -0,0 +1,87 @@
+#!/usr/bin/env python3
+
+import docker
+from pprint import pprint
+import os
+import requests
+import sys
+import signal
+import time
+import threading
+
+# thread safe print
+from tsprint import print
+
+
+
+services = dict()
+# Service               check url                                   initial delay, delay, dockername
+services["analysis"]  = ["http://analysis:8081/health",                       30,  15,    "analysis"]
+services["camunda"]   = ["http://camunda:8085/engine-rest/process-instance/", 300, 15,    "camunda"]
+services["reporting"] = ["http://reporting:8083/health",                      30,  15,    "reporting"]
+services["twitter"]   = ["http://twitter:8084/actuator/health",               60,  15,    "twitter"]
+services["website"]   = ["http://website:8080/",                              30,  15,    "website"]
+
+
+
+docker_ctl = None
+
+def check_service(servicename):
+    global services
+#    print(servicename+" -- checking service.")
+    try:
+        r = requests.get(services[servicename][0], timeout=3)
+        if r.status_code != 200:
+            raise Exception("errorcode != 200: "+int(r.status_code))
+
+        print("+ "+servicename+" -- looks good :)")
+
+        # schedule next check
+        th = threading.Timer(services[servicename][2], check_service, args=[servicename])
+        th.setName('failover_'+servicename)
+        th.start()
+    except:
+        print("- "+servicename+" -- looks bad :( "+str(sys.exc_info()[0]))
+        reset_service(servicename)
+
+        # schedule next check using startup delay
+        th = threading.Timer(services[servicename][1], check_service, args=[servicename])
+        th.setName('failover_'+servicename)
+        th.start()
+
+
+
+def reset_service(servicename):
+    global services
+    global docker_ctl
+    print("  "+servicename+" -- restarting service")
+    try:
+        docker_ctl.containers.client.api.restart(services[servicename][3], timeout=1)
+    except:
+        print("! Docker said NO/service restart failed."+str(sys.exc_info()[0]))
+
+
+
+if __name__ == "__main__":
+    def signal_handler(signal, frame):
+        print('SIG received. exitting!')
+        os._exit(1)
+    signal.signal(signal.SIGINT, signal_handler)
+
+    docker_ctl = docker.from_env()
+    print(docker_ctl.info())
+
+    print()
+    print("####################")
+    print("# Failover service #")
+    print("####################")
+
+    pprint(services)
+    print()
+
+    print("Starting watchers")
+
+    for servicename, service in services.items():
+        th = threading.Timer(service[1], check_service, args=[servicename])
+        th.setName('failover_'+servicename)
+        th.start()
diff --git a/service-failover/requirements.txt b/service-failover/requirements.txt
new file mode 100644 (file)
index 0000000..2f54570
--- /dev/null
@@ -0,0 +1,2 @@
+docker
+requests
diff --git a/service-failover/tsprint.py b/service-failover/tsprint.py
new file mode 100644 (file)
index 0000000..72839e7
--- /dev/null
@@ -0,0 +1,12 @@
+#!/usr/bin/env python3
+
+# make print threadsafe.
+
+from threading import Lock
+
+printlock = Lock()
+oldprint = print
+
+def print(*a, **b):
+       with printlock:
+               oldprint(*a, **b)
diff --git a/service-fallback/Dockerfile b/service-fallback/Dockerfile
deleted file mode 100644 (file)
index b09b037..0000000
+++ /dev/null
@@ -1 +0,0 @@
-FROM alpine:latest
diff --git a/service-fallback/README.md b/service-fallback/README.md
deleted file mode 100644 (file)
index d3a44f8..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-# Fallback service
-
-tbd
\ No newline at end of file