]> git.somenet.org - pub/jan/aic18.git/blob - service-failover/main.py
Add online @ info + little polishing.
[pub/jan/aic18.git] / service-failover / main.py
1 #!/usr/bin/env python3
2
3 import docker
4 from pprint import pprint
5 import os
6 import requests
7 import sys
8 import signal
9 import time
10 import threading
11
12 # thread safe print
13 from tsprint import print
14
15
16
17 services = dict()
18 # Service               check url                                   initial delay, delay, dockername
19 services["analysis"]  = ["http://analysis:8081/health",                       30,  15,    "analysis"]
20 services["camunda"]   = ["http://camunda:8085/engine-rest/process-instance/", 300, 15,    "camunda"]
21 services["reporting"] = ["http://reporting:8083/health",                      30,  15,    "reporting"]
22 services["twitter"]   = ["http://twitter:8084/actuator/health",               60,  15,    "twitter"]
23 services["website"]   = ["http://website:8080/",                              30,  15,    "website"]
24
25
26
27 docker_ctl = None
28
29 def check_service(servicename):
30     global services
31 #    print(servicename+" -- checking service.")
32     try:
33         r = requests.get(services[servicename][0], timeout=3)
34         if r.status_code != 200:
35             raise Exception("errorcode != 200: "+int(r.status_code))
36
37         print("+ "+servicename+" -- looks good :)")
38
39         # schedule next check
40         th = threading.Timer(services[servicename][2], check_service, args=[servicename])
41         th.setName('failover_'+servicename)
42         th.start()
43     except:
44         print("- "+servicename+" -- looks bad :( "+str(sys.exc_info()[0]))
45         reset_service(servicename)
46
47         # schedule next check using startup delay
48         th = threading.Timer(services[servicename][1], check_service, args=[servicename])
49         th.setName('failover_'+servicename)
50         th.start()
51
52
53
54 def reset_service(servicename):
55     global services
56     global docker_ctl
57     print("  "+servicename+" -- restarting service")
58     try:
59         docker_ctl.containers.client.api.restart(services[servicename][3], timeout=1)
60     except:
61         print("! Docker said NO/service restart failed."+str(sys.exc_info()[0]))
62
63
64
65 if __name__ == "__main__":
66     def signal_handler(signal, frame):
67         print('SIG received. exitting!')
68         os._exit(1)
69     signal.signal(signal.SIGINT, signal_handler)
70
71     docker_ctl = docker.from_env()
72     print(docker_ctl.info())
73
74     print()
75     print("####################")
76     print("# Failover service #")
77     print("####################")
78
79     pprint(services)
80     print()
81
82     print("Starting watchers")
83
84     for servicename, service in services.items():
85         th = threading.Timer(service[1], check_service, args=[servicename])
86         th.setName('failover_'+servicename)
87         th.start()