]> git.somenet.org - pub/jan/aic18.git/blob - service-failover/main.py
a working failover service, with broken /health check url configuration.
[pub/jan/aic18.git] / service-failover / main.py
1 #!/usr/bin/env python3
2
3 import docker
4 from pprint import pprint
5 import os
6 import requests
7 import sys
8 import signal
9 import time
10 import threading
11
12 # thread safe print
13 from tsprint import print
14
15
16 services = dict()
17 # Service, check url, initial delay, timeout, dockername
18 services["analysis"]  = ["http://analysis:8081/health",  30,  15, "analysis"]
19 services["camunda"]   = ["http://camunda:8085/health",   300, 15, "camunda"]
20 services["reporting"] = ["http://reporting:8083/health", 30,  15, "reporting"]
21 services["twitter"]   = ["http://twitter:8084/health",   30,  15, "twitter"]
22 services["website"]   = ["http://website:8080/health",   30,  15, "website"]
23
24
25
26 docker_ctl = None
27
28 def check_service(servicename):
29     global services
30 #    print(servicename+" -- checking service.")
31     try:
32         r = requests.get(services[servicename][0], timeout=3)
33         if r.status_code != 200:
34             raise Exception("errorcode != 200: "+int(r.status_code))
35         print("+ "+servicename+" -- looks good :)")
36
37         # schedule next check.
38         th = threading.Timer(services[servicename][2], check_service, args=[servicename])
39         th.setName('failover_'+servicename)
40         th.start()
41     except:
42         print("- "+servicename+" -- looks bad :( "+str(sys.exc_info()[0]))
43         try:
44             reset_service(servicename)
45         except:
46             print("! Docker said NO/service restart failed."+str(sys.exc_info()[0]))
47
48         # schedule next check using startup delay.
49         th = threading.Timer(services[servicename][1], check_service, args=[servicename])
50         th.setName('failover_'+servicename)
51         th.start()
52
53
54
55 def reset_service(servicename):
56     global services
57     global docker_ctl
58     print("  "+servicename+" -- resetting service")
59     docker_ctl.containers.client.api.kill(services[servicename][3])
60     docker_ctl.containers.client.api.restart(services[servicename][3])
61
62
63
64 if __name__ == "__main__":
65     def signal_handler(signal, frame):
66         print('SIG received. exitting!')
67         os._exit(1)
68     signal.signal(signal.SIGINT, signal_handler)
69
70     docker_ctl = docker.from_env()
71     print(docker_ctl.info())
72
73     print()
74     print("####################")
75     print("# Failover service #")
76     print("####################")
77
78     for servicename, service in services.items():
79         print("  "+servicename+" -- postponing initial check for "+str(service[1])+" sec")
80         th = threading.Timer(service[1], check_service, args=[servicename])
81         th.setName('failover_'+servicename)
82         th.start()