]> git.somenet.org - pub/jan/mattermost.git/blob - rss_to_channel/main.py
[somebot] requirements.txt for all modules.
[pub/jan/mattermost.git] / rss_to_channel / main.py
1 #!/usr/bin/env python3
2 # Someone's Mattermost scripts.
3 #  Copyright (c) 2016-2020 by Someone <someone@somenet.org> (aka. Jan Vales <jan@jvales.net>)
4 #  published under MIT-License
5 #
6 # spam channel
7 #
8
9 import traceback
10 import urllib.parse
11 import json
12 import random
13 from html.parser import HTMLParser
14
15 import feedparser
16 feedparser.PREFERRED_XML_PARSERS.remove("drv_libxml2")
17
18 import mattermost
19
20 import config
21
22
23 class MLStripper(HTMLParser):
24     def __init__(self):
25         self.reset()
26         self.strict = False
27         self.convert_charrefs = True
28         self.fed = []
29     def handle_data(self, d):
30         self.fed.append(d)
31     def get_data(self):
32         return "".join(self.fed)
33
34 def strip_tags(html):
35     s = MLStripper()
36     s.feed(html)
37     return s.get_data()
38
39
40 def processFeed(mm, feed):
41     rssdata = dict()
42     try:
43         with open("data/rss."+feed+".json") as data_file:
44             rssdata = json.load(data_file)
45     except:
46         pass
47
48     # Is still present?
49     for id in list(rssdata):
50         rssdata[id] = 0
51
52     f = feedparser.parse(config.mm_rss_config[feed][0])
53     for entry in f.entries:
54         id = entry.link+entry.title
55         if id in rssdata:
56             rssdata[id] = 1
57         else:
58             rssdata[id] = 2
59
60             split = urllib.parse.urlsplit(entry.link)
61             qry = "mattermost"
62             if split.query != "":
63                 qry = split.query+"&mattermost"
64             url = urllib.parse.urlunsplit((split.scheme, split.netloc, split.path, qry, split.fragment))
65             try:
66                 date = "("+entry["published"]+") "
67             except:
68                 date = ""
69
70             msg = "### "+config.mm_rss_config[feed][2]+" ``"+entry.title+"``\n"+date+url+"\n\n----\n\n"+entry["summary"]
71             if config.mm_rss_config[feed][3]:
72                 msg = "### "+config.mm_rss_config[feed][2]+" ``"+entry.title+"``\n"+date+url+"\n\n----\n\n"+strip_tags(entry["summary"])
73             for chanid in config.mm_rss_config[feed][1]:
74                 print("Posting to channel:" +str(mm.create_post(chanid, msg)))
75
76     for id in list(rssdata):
77         if rssdata[id] == 0 and int(random.random() * 20000) == 0:
78             del rssdata[id]
79
80     with open("data/rss."+feed+".json", "w") as f:
81         f.write(json.dumps(rssdata))
82
83
84
85 def main(mm):
86     for feed in config.mm_rss_config:
87         try:
88             processFeed(mm, feed)
89         except:
90             traceback.print_exc()
91
92
93
94 if __name__ == "__main__":
95     mm = mattermost.MMApi(config.mm_api_url)
96     mm.login(config.mm_user, config.mm_user_pw)
97
98     main(mm)
99
100     mm.logout()