From 82f405d1d3fb8ee4dca8073afc2cfa4096afe64f Mon Sep 17 00:00:00 2001 From: Someone Date: Fri, 19 Jun 2020 01:32:13 +0200 Subject: [PATCH] REWORK [cron] VoWi-Magic happens here. Manage MM-Channels and notify of changed VoWi-Pages. --- mw_vowi/.gitignore | 2 + mw_vowi/config.py.example | 23 +++ mw_vowi/data/.gitkeep | 0 mw_vowi/main.py | 369 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 394 insertions(+) create mode 100644 mw_vowi/.gitignore create mode 100644 mw_vowi/config.py.example create mode 100644 mw_vowi/data/.gitkeep create mode 100755 mw_vowi/main.py diff --git a/mw_vowi/.gitignore b/mw_vowi/.gitignore new file mode 100644 index 0000000..bbc97f1 --- /dev/null +++ b/mw_vowi/.gitignore @@ -0,0 +1,2 @@ +config.py +data/** diff --git a/mw_vowi/config.py.example b/mw_vowi/config.py.example new file mode 100644 index 0000000..52de7b9 --- /dev/null +++ b/mw_vowi/config.py.example @@ -0,0 +1,23 @@ +# +# Someone's Mattermost scripts. +# Copyright (c) 2016-2020 by Someone (aka. Jan Vales ) +# published under MIT-License +# +# Example config file. +# mv to config.py.examle config.py + edit. +# + +# Mediawiki settings +mw_user = "..." +mw_user_pw = "..." +mw_name = "..." + +# Mattermost settings +mm_api_url = "..." +mm_user = "..." +mm_user_pw = "..." +mm_autochannels_team = "..." +mm_admin_channel = "..." +mm_footerlist = [ + "...", + ] diff --git a/mw_vowi/data/.gitkeep b/mw_vowi/data/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/mw_vowi/main.py b/mw_vowi/main.py new file mode 100755 index 0000000..3b9ca61 --- /dev/null +++ b/mw_vowi/main.py @@ -0,0 +1,369 @@ +#!/usr/bin/env python3 +# Someone's Mattermost scripts. +# Copyright (c) 2016-2020 by Someone (aka. Jan Vales ) +# published under MIT-License +# + +import json +import os +import pprint +import random +import re +import signal +import sys +import time +import traceback +import mwclient +import mattermost + +import config + + +######## +# Util # +######## +# global regexes +REXtuwien = re.compile(r'^TU Wien\:(.*) \w\w,?\w?\w? \(.*\).*$', re.IGNORECASE) +REXdaten = re.compile(r'==\s*Daten\s*==', re.MULTILINE|re.DOTALL|re.IGNORECASE) +REXwdata_mmchan = re.compile(r'\*??\s*\{\{mattermost-channel\|([a-zA-Z0-9_ -]+)\}\}\s*\n{0,2}', re.IGNORECASE) + +def get_smw_properties(subject, sobj_dict={'sobj': {}}): + ret = dict() + for prop in subject: + if prop['property'] not in ret: + ret[prop['property']] = [] + for dataitem in prop['dataitem']: + if dataitem['type'] == 9 and dataitem['item'] in sobj_dict['sobj']: + ret[prop['property']] += [sobj_dict['sobj'][dataitem['item']]] + else: + ret[prop['property']] += [dataitem['item']] + return ret + +def query_smw_properties(site, pagename): + res = site.api('browsebysubject', subject=pagename) + + # gather all sub objects first + ret = {'sobj': {}} + if 'sobj' in res['query']: + for sobj in res['query']['sobj']: + ret['sobj'][sobj['subject']] = get_smw_properties(sobj['data']) + + ret = get_smw_properties(res['query']['data'], ret) + + if '_ASK' in ret: + del ret['_ASK'] + return ret + + +def mw_pagename_to_mm_chan_mapping(pagename): + try: + mmchdisplayname = REXtuwien.search(pagename).group(1) + vowiurlname = mmchdisplayname.replace(' ', '_') + mmchname = mmchdisplayname.lower().replace('ä', 'ae').replace('ö', 'oe').replace('ü', 'ue') + mmchdisplayname = mmchdisplayname[:63] + + mmchname = re.sub(r'[^a-zA-Z0-9_]', r'-', mmchname) + mmchname = re.sub(r'-+', r'-', mmchname) + mmchname = mmchname[:63] + mmchname = mmchname.strip('-') + + # now even group-channels (len == 40) do weird shit. Thanks MM :/ + # Heisen-Issue: for some reason MM uses sometimes the channel name as id if len == 26 gives and when switching between teams. + if len(mmchname) == 54 or len(mmchname) == 40 or len(mmchname) == 26: + mmchname = mmchname+'0' + + return (mmchname, (mmchname, mmchdisplayname, vowiurlname, pagename)) + except: + print('Failed to process pagemapping for: '+pagename) + return ("", ("", "", "", "")) + +######### +# /Util # +######### + + + +def create_update_spam_mm_channel(mws, mma, basepage_name, changes_to_notify_about=dict()): + print('\ncreate_update_spam_mm_channel(): https://vowi.fsinf.at/wiki/'+basepage_name.replace(' ', '_').replace(')', '%29')) + + # really purge cache :/ + sp = mws.Pages[basepage_name] + sp.purge() + + if not sp.exists or sp.redirect: + print('Page was deleted or moved. ignoring.') + return + + # get smw data of basepage + semantic_data = query_smw_properties(mws, sp.name) + sp_text_daten = sp_text_daten_orig = sp.text(section=1) + if REXdaten.match(sp_text_daten) is None: + raise Exception('"== Daten ==" not section=1') + + # abort if veraltet + if 'Ist_veraltet' in semantic_data and semantic_data['Ist_veraltet'][0] == 't': + print('Page is veraltet. ignoring.') + return + + # generate metadata + basepage_name_mapping = mw_pagename_to_mm_chan_mapping(basepage_name) + metadata = dict() + metadata['display_name'] = basepage_name_mapping[1][1] + metadata['purpose'] = 'LVA(s) im VoWi: https://vowi.fsinf.at/LVA/'+basepage_name_mapping[1][2]+'?mm' + metadata['header'] = 'Links: [LVA(s) im VoWi](https://vowi.fsinf.at/LVA/'+basepage_name_mapping[1][2]+'?mm)' + + if 'Hat_Kurs-ID' in semantic_data: + metadata['header'] += ' - [LVA in TISS](https://tiss.tuwien.ac.at/course/courseDetails.xhtml?courseNr='+semantic_data['Hat_Kurs-ID'][0]+')' + if 'Hat_Homepage' in semantic_data: + metadata['header'] += ' - [LVA-HP]('+semantic_data['Hat_Homepage'][0]+')' + + # create channel, if not exist + mmchan = mma.get_channel_by_name(config.mm_autochannels_team, basepage_name_mapping[0]) + if 'status_code' in mmchan and mmchan['status_code'] == 404: + print(mma.create_channel(config.mm_autochannels_team, basepage_name_mapping[0], metadata['display_name'], metadata['purpose'], metadata['header'])) + + # really get channel + mmchan = mma.get_channel_by_name(config.mm_autochannels_team, basepage_name_mapping[0]) + + + # update wiki:data-section. always. + sp_text_daten = re.sub(REXwdata_mmchan, '', sp_text_daten) + sp_text_daten = sp_text_daten.strip() + sp_text_daten += '\n{{mattermost-channel|'+basepage_name_mapping[0]+'}}' + if sp_text_daten != sp_text_daten_orig: + print('Updating MW page: '+str(sp.save(sp_text_daten, section=1, summary='mw_vowi'))) + + + # update channel + if 'type' in mmchan and mmchan['type'] == 'O' and ((sp_text_daten != sp_text_daten_orig) or (len(changes_to_notify_about) > 0)): + mma.patch_channel(mmchan['id'], metadata) + + # spam channel if there is anything to spam about + if len(changes_to_notify_about) > 0: + msg = '``BOT-AUTODELETE-SLOW``\nFolgende Seiten wurden in den letzten 24 Stunden im VoWi editiert:\n' + for v in changes_to_notify_about.values(): + msg += '\nhttps://vowi.fsinf.at/wiki/'+v['title'].replace(' ', '_') + msg += '\n\n``footer``\n'+str(random.choice(config.mm_footerlist)) + print('Posting to channel:' +str(mma.create_post(mmchan['id'], msg))) + + + +def process_recent_vowi_changes(mws, mma): + lastdata = dict() + try: + with open('data/last.json') as fh: + lastdata = json.load(fh) + except: + print('Failed to load last.json') + + changes = dict() + if 'failed' in lastdata: + changes = lastdata['failed'] + + if 'delme' in lastdata: + del lastdata['delme'] + + if 'timestamp' in lastdata: + rc = mws.recentchanges(start=lastdata['timestamp'], dir='newer', namespace=3000, show='!bot|!redirect|!minor', toponly=True) + else: + rc = mws.recentchanges(dir='newer', namespace=3000, show='!bot|!redirect|!minor', toponly=False) + lastdata['timestamp'] = 0 + lastdata['last_timestamp'] = lastdata['timestamp'] + + if 'rcid' not in lastdata: + lastdata['rcid'] = 0 + + for c in rc: + if c['rcid'] == lastdata['rcid']: + continue + + basepage = c['title'].split('/', 1)[0] + if basepage not in changes: + changes[basepage] = dict() + changes[basepage][c['title']] = c + changes[basepage][c['title']]['timestamp'] = int(time.strftime('%Y%m%d%H%M%S', changes[basepage][c['title']]['timestamp'])) + + if changes[basepage][c['title']]['timestamp'] > lastdata['timestamp']: + lastdata['timestamp'] = changes[basepage][c['title']]['timestamp'] + if changes[basepage][c['title']]['rcid'] > lastdata['rcid']: + lastdata['rcid'] = changes[basepage][c['title']]['rcid'] + + lastdata['failed'] = changes + + # process all pending changes + for ch in list(changes): + semantic_data = query_smw_properties(mws, ch) + + # never mind, its outdated anyway ... + if 'Ist_veraltet' in semantic_data and semantic_data['Ist_veraltet'][0] == 't': + del lastdata['failed'][ch] + continue + + try: + print("trying: "+ch) + create_update_spam_mm_channel(mws, mma, ch, changes[ch]) + del lastdata['failed'][ch] + except Exception as ex: + print('Exception, skipping: '+ch+' - '+str(sys.exc_info())) + traceback.print_exc() + + with open('data/last.json', 'w') as fh: + json.dump(lastdata, fh) + + if len(lastdata['failed']) != 0: + print('Some changes failed to get posted!') + pprint.pprint(lastdata) + + + +def process_all_LVAs(mws, mma): + for p in mws.ask('[[TU Wien:+]][[Ist veraltet::0]]'): + create_update_spam_mm_channel(mws, mma, ''.join(p)) + + + +# remove "has mm-channel" info from outdated lva pages. +def process_outdated_LVAs(mws): + for p in mws.ask('[[Hat Mattermost-Channel::+]][[Ist veraltet::1]]'): + sp = mws.Pages[(''.join(p.keys()))] + print('\nProcessing: https://vowi.fsinf.at/wiki/'+sp.name.replace(' ', '_').replace(')', '%29')) + + sp_text = sp.text(section=1) + + sp_text = re.sub(REXwdata_mmchan, '', sp_text) + + print(sp.save(sp_text, section=1, summary='mw_vowi')) + sp.purge() + + + +def process_outdated_MMChannels(mws, mma, spam_channel=False): + # get a dict of all lva-pages that have a mm-channel saved and map them to all the possible channel-names. + # Legitimate (not outdated LVAs) + mw_page_names_with_mm_channelname_mappings = dict([mw_pagename_to_mm_chan_mapping(c) for c in [''.join(p) for p in mws.ask('[[Hat Mattermost-Channel::+]]')]]) + + # All existing mm channels + mm_all_channels_with_infos = {mmchan['name']:mmchan for mmchan in mma.get_team_channels(config.mm_autochannels_team)} + + # outdated candidates. + diff_channel_names = set(mm_all_channels_with_infos.keys()) - set(mw_page_names_with_mm_channelname_mappings.keys()) + diff_channel_names.discard('town-square') + diff_channel_names.discard('off-topic') + + # get replacements + # dict of all outdated LVA-pages. + mw_outdated_page_names_mappings = dict([mw_pagename_to_mm_chan_mapping(c) for c in [''.join(p) for p in mws.ask('[[TU Wien:+]][[Kategorie:LVAs]][[Ist veraltet::1]]')]]) +# TODO: seems b0rked. +# mw_lva_ersetzt_durch = dict() +# for p in mws.ask('[[Ersetzt durch::+]]|?Ersetzt durch'): +# for k,v in p.items(): +# mw_lva_ersetzt_durch[k] = v['printouts']['Ersetzt durch'][0] +# +# mw_lva_wirklich_ersetzt_durch = mw_lva_ersetzt_durch.copy() +# pprint.pprint(mw_lva_wirklich_ersetzt_durch) +# return +# for k,v in mw_lva_ersetzt_durch.items(): +# ersatz = k +# while ersatz in mw_lva_ersetzt_durch: +# ersatz = mw_lva_ersetzt_durch[ersatz] +# mw_lva_wirklich_ersetzt_durch[k] = ersatz +# + + # notify admins + msg = '``BOT-AUTODELETE-SLOW``\n#### ``Likely outdated MM channels found``\nClick ``Next...`` to load all channels, to make these links work\nCheck via vowi if there is really no not-outdated LVA-page.\n' + msg_inconsistent = '' + real_diff_channel_names = diff_channel_names.copy() + for d in diff_channel_names: + try: + msg += ' + ~'+d+' :arrow_right: https://vowi.fsinf.at/wiki/Spezial:FlexiblePrefix/'+mw_outdated_page_names_mappings[d][2]+'\n' + except: + msg_inconsistent += ' + ~'+d+' :warning: ``No known VoWi URL - SWM-Consistency issue?``\n' + real_diff_channel_names.discard(d) + + if len(diff_channel_names) == 0: + msg = '``BOT-AUTODELETE-SLOW``\n#### `` No outdated MM channels found`` :)\n' + + if len(msg_inconsistent) > 0: + msg += '\n\nThese ('+str(len(diff_channel_names)-len(real_diff_channel_names))+') are likely SMW inconsistencies and were skipped:\n'+msg_inconsistent + msg += 'If there are many of these, consider running ``SemanticMediaWiki/maintenance/rebuildData.php``' + + # make real_ the real deal + diff_channel_names = real_diff_channel_names + del real_diff_channel_names + + print('Posting '+str(len(msg))+' chars to channel:' +str(mma.create_post(config.mm_admin_channel, msg))) + + + # notify channels + if not spam_channel: + return + + for mmchan in diff_mmchan_names: + print('notifying: '+mmchan) + metadata = dict() + try: + metadata['purpose'] = 'Outdated? Dieser LVA-Channel wurde als outdated markiert, da alle seine VoWi-LVAs outdated sind. LVA(s) im VoWi: https://vowi.fsinf.at/LVA/'+mw_mmchans_full_all[mmchan][2].replace(' ', '_')+'?mm' + metadata['header'] = ':warning: Outdated? :warning: Links: [Outdated LVA(s) im VoWi](https://vowi.fsinf.at/LVA/'+mw_mmchans_full_all[mmchan][2].replace(' ', '_')+'?mm)' + except: + metadata['purpose'] = 'Outdated? Dieser LVA-Channel wurde als outdated markiert, da alle seine VoWi-LVAs outdated sind. Sollte dies ein Fehler sein, korrigiere bitte die entsprechende LVA im VoWi.' + metadata['header'] = ':warning: Outdated? :warning: Dieser LVA-Channel wurde als outdated markiert, da alle seine VoWi-LVAs outdated sind.' + + # update channel info. + print(mma.patch_channel(mm_mmchans_full[mmchan]['id'], metadata)) + + # post "something changed" info + msg = ('``BOT-AUTODELETE-SLOW``\n### Dieser LVA-Channel wurde als outdated markiert, da alle seine VoWi-LVAs outdated sind.\n' + 'Sollte dies ein Fehler sein, korrigiere bitte die entsprechende LVA-Seite im VoWi.\n' + '``Falls die LVA von einer neuen Person übernommen wurde und sich dabei die Durchführung grundlegend geändert hat, oder dies zu erwarten ist, lege bitte eine neue LVA-Seite an.``\n\n' + '## :warning: :warning: :warning: :warning: :warning: :warning: :warning:\n\n' + ) + + try: + msg += ':arrow_right: Die Nachfolge-LVA laut VoWi ist: https://vowi.fsinf.at/LVA/'+mw_lva_ersetzt_durch[mmchan].replace(' ', '_')+' \n' + except: + msg += ':warning: Eine Nachfolge-LVA ist nicht bekannt, im VoWi nicht vermerkt, oder konnte nicht ermittelt werden :(\n' + try: + msg += ':arrow_right: Der Nachfolge-LVA-MM-Channel dürfte sein: ~'+mw_pagename_to_mm_chan_mapping(mw_lva_ersetzt_durch[mmchan])[0] + except: + msg += ':warning: Ein Nachfolge-LVA-MM-Channel konnte nicht ermittelt werden :(' + + print('Posting to channel:' +str(mma.create_post(mm_mmchans_full[mmchan]['id'], msg))) + + + +if __name__ == '__main__': + def signal_handler(signal, frame): + print('SIG received. exitting!') + os._exit(0) + signal.signal(signal.SIGINT, signal_handler) + + mws = mwclient.Site(config.mw_name, path='/', retry_timeout=120) + mws.login(config.mw_user, config.mw_user_pw) + mma = mattermost.MMApi(config.mm_api_url) + mma.login(config.mm_user, config.mm_user_pw) + + # Use recent changes to create, update and spam channels. + if len(sys.argv) == 1: + process_recent_vowi_changes(mws, mma) + + # maint stuff + # "full" run modes - We cant run the whole script at once, due to out db-backup, likely killing our run mid-excecution. :( + # "new" should actually not really be needed, as it should be covered by incremental changes. + if len(sys.argv) > 1 and sys.argv[1] == 'all-pages': + print(sys.argv[1]) + process_all_LVAs(mws, mma) + + if len(sys.argv) > 1 and sys.argv[1] == 'outdated-pages': + print(sys.argv[1]) + process_outdated_LVAs(mws) + + if len(sys.argv) > 1 and sys.argv[1] == 'outdated-chans': + print(sys.argv[1]) + process_outdated_MMChannels(mws, mma, False) + + if len(sys.argv) > 1 and sys.argv[1] == 'outdated-chans-spam': + print(sys.argv[1]) + process_outdated_MMChannels(mws, mma, True) + + # logout + mma.revoke_user_session() -- 2.43.0