1 /++
2     The Webtitles plugin catches URLs pasted in a channel, follows them and
3     reports back the title of the web page that was linked to.
4 
5     It has no bot commands; everything is done by automatically scanning channel
6     and private query messages for things that look like links.
7 
8     See_Also:
9         https://github.com/zorael/kameloso/wiki/Current-plugins#webtitles,
10         [kameloso.plugins.common.core],
11         [kameloso.plugins.common.misc]
12 
13     Copyright: [JR](https://github.com/zorael)
14     License: [Boost Software License 1.0](https://www.boost.org/users/license.html)
15 
16     Authors:
17         [JR](https://github.com/zorael)
18  +/
19 module kameloso.plugins.webtitles;
20 
21 version(WithWebtitlesPlugin):
22 
23 private:
24 
25 import kameloso.plugins;
26 import kameloso.plugins.common.core;
27 import kameloso.plugins.common.awareness : MinimalAuthentication;
28 import kameloso.messaging;
29 import dialect.defs;
30 import std.json : JSONValue;
31 import std.typecons : Flag, No, Yes;
32 
33 
34 // descriptionExemptions
35 /++
36     Hostnames explicitly exempt from having their descriptions included after the titles.
37 
38     Must be in lowercase.
39  +/
40 static immutable descriptionExemptions =
41 [
42     "imgur.com",
43 ];
44 
45 
46 // WebtitlesSettings
47 /++
48     All Webtitles settings, gathered in a struct.
49  +/
50 @Settings struct WebtitlesSettings
51 {
52     /// Toggles whether or not the plugin should react to events at all.
53     @Enabler bool enabled = true;
54 
55     /// Toggles whether or not meta descriptions should be reported next to titles.
56     bool descriptions = true;
57 }
58 
59 
60 // TitleLookupResults
61 /++
62     A record of a URL lookup.
63 
64     This is both used to aggregate information about the lookup, as well as to
65     add hysteresis to lookups, so we don't look the same one up over and over
66     if they were pasted over and over.
67  +/
68 struct TitleLookupResults
69 {
70     /// Looked up web page title.
71     string title;
72 
73     /// The content of the web page's `description` tag.
74     string description;
75 
76     /// Domain name of the looked up URL.
77     string domain;
78 
79     /// YouTube video title, if such a YouTube link.
80     string youtubeTitle;
81 
82     /// YouTube video author, if such a YouTube link.
83     string youtubeAuthor;
84 
85     /// The UNIX timestamp of when the title was looked up.
86     long when;
87 }
88 
89 
90 // TitleLookupRequest
91 /++
92     A record of a URL lookup request.
93 
94     This is used to aggregate information about a lookup request, making it
95     easier to pass it in between functions. It serves no greater purpose.
96  +/
97 struct TitleLookupRequest
98 {
99     /// The context state of the requesting plugin instance.
100     IRCPluginState state;
101 
102     /// The [dialect.defs.IRCEvent|IRCEvent] that instigated the lookup.
103     IRCEvent event;
104 
105     /// URL to look up.
106     string url;
107 
108     /// Results of the title lookup.
109     TitleLookupResults results;
110 }
111 
112 
113 // onMessage
114 /++
115     Parses a message to see if the message contains one or more URLs.
116 
117     It uses a simple state machine in [kameloso.common.findURLs|findURLs] to
118     exhaustively try to look up every URL returned by it.
119  +/
120 @(IRCEventHandler()
121     .onEvent(IRCEvent.Type.CHAN)
122     .permissionsRequired(Permissions.ignore)
123     .channelPolicy(ChannelPolicy.home)
124 )
125 void onMessage(WebtitlesPlugin plugin, const ref IRCEvent event)
126 {
127     import kameloso.common : findURLs;
128     import lu.string : beginsWith, strippedLeft;
129 
130     if (event.content.strippedLeft.beginsWith(plugin.state.settings.prefix)) return;
131 
132     string[] urls = findURLs(event.content);  // mutable so nom works
133     if (!urls.length) return;
134 
135     return lookupURLs(plugin, event, urls);
136 }
137 
138 
139 // lookupURLs
140 /++
141     Looks up the URLs in the passed `string[]` `urls` by spawning a worker
142     thread to do all the work.
143 
144     It accesses the cache of already looked up addresses to speed things up.
145  +/
146 void lookupURLs(WebtitlesPlugin plugin, const /*ref*/ IRCEvent event, string[] urls)
147 {
148     import kameloso.common : logger;
149     import lu.string : beginsWith, nom;
150     import std.concurrency : spawn;
151 
152     bool[string] uniques;
153 
154     foreach (immutable i, url; urls)
155     {
156         // If the URL contains an octothorpe fragment identifier, like
157         // https://www.google.com/index.html#this%20bit
158         // then strip that.
159         url = url.nom!(Yes.inherit, Yes.decode)('#');
160 
161         while (url[$-1] == '/')
162         {
163             url = url[0..$-1];
164         }
165 
166         if (url in uniques) continue;
167 
168         uniques[url] = true;
169 
170         enum pattern = "Caught URL: <l>%s";
171         logger.infof(pattern, url);
172 
173         TitleLookupRequest request;
174         request.state = plugin.state;
175         request.event = event;
176         request.url = url;
177 
178         if (plugin.cache.length) prune(plugin.cache, plugin.expireSeconds);
179 
180         TitleLookupResults cachedResult;
181 
182         synchronized //()
183         {
184             if (const cachedResultPointer = url in plugin.cache)
185             {
186                 cachedResult = *cachedResultPointer;
187             }
188         }
189 
190         if (cachedResult != TitleLookupResults.init)
191         {
192             logger.log("Found cached lookup.");
193             request.results = cachedResult;
194 
195             if (request.results.youtubeTitle.length)
196             {
197                 reportYouTubeTitle(request);
198             }
199             else
200             {
201                 reportTitle(request);
202             }
203             continue;
204         }
205 
206         cast(void)spawn(&worker, cast(shared)request, plugin.cache,
207             (i * plugin.delayMsecs),
208             cast(Flag!"descriptions")plugin.webtitlesSettings.descriptions,
209             plugin.state.connSettings.caBundleFile);
210     }
211 
212     import kameloso.thread : ThreadMessage;
213     import std.concurrency : prioritySend;
214 
215     plugin.state.mainThread.prioritySend(ThreadMessage.shortenReceiveTimeout());
216 }
217 
218 
219 // worker
220 /++
221     Looks up and reports the title of a URL.
222 
223     Additionally reports YouTube titles and authors if settings say to do such.
224 
225     Worker to be run in its own thread.
226 
227     Params:
228         sRequest = Shared [TitleLookupRequest] aggregate with all the state and
229             context needed to look up a URL and report the results to the local terminal.
230         cache = Shared cache of previous [TitleLookupRequest]s.
231         delayMsecs = Milliseconds to delay before doing the lookup, to allow for
232             parallel lookups without bursting all of them at once.
233         descriptions = Whether or not to look up meta descriptions.
234         caBundleFile = Path to a `cacert.pem` SSL certificate bundle.
235  +/
236 void worker(
237     shared TitleLookupRequest sRequest,
238     shared TitleLookupResults[string] cache,
239     const ulong delayMsecs,
240     const Flag!"descriptions" descriptions,
241     const string caBundleFile)
242 {
243     import lu.string : beginsWith, contains, nom;
244     import std.datetime.systime : Clock;
245     import std.format : format;
246     import std.typecons : No, Yes;
247     static import kameloso.common;
248 
249     version(Posix)
250     {
251         import kameloso.thread : setThreadName;
252         setThreadName("webtitles");
253     }
254 
255     // Set the global settings so messaging functions don't segfault us
256     kameloso.common.settings = &(cast()sRequest).state.settings;
257 
258     if (delayMsecs > 0)
259     {
260         import core.thread : Thread;
261         import core.time : msecs;
262         Thread.sleep(delayMsecs.msecs);
263     }
264 
265     TitleLookupRequest request = cast()sRequest;
266     immutable now = Clock.currTime.toUnixTime;
267 
268     if (request.url.contains("://i.imgur.com/"))
269     {
270         // imgur direct links naturally have no titles, but the normal pages do.
271         // Rewrite and look those up instead.
272         request.url = rewriteDirectImgurURL(request.url);
273     }
274     else if (request.url.contains("youtube.com/watch?v=") ||
275         request.url.contains("youtu.be/"))
276     {
277         // Do our own slicing instead of using regexes, because footprint.
278         string slice = request.url;
279 
280         slice.nom!(Yes.decode)("http");
281         if (slice[0] == 's') slice = slice[1..$];
282         slice = slice[3..$];  // ://
283 
284         if (slice.beginsWith("www.")) slice = slice[4..$];
285 
286         if (slice.beginsWith("youtube.com/watch?v=") ||
287             slice.beginsWith("youtu.be/"))
288         {
289             import std.json : JSONException;
290 
291             try
292             {
293                 immutable info = getYouTubeInfo(request.url, caBundleFile);
294 
295                 // Let's assume all YouTube clips have titles and authors
296                 // Should we decode the author too?
297                 request.results.youtubeTitle = decodeEntities(info["title"].str);
298                 request.results.youtubeAuthor = info["author_name"].str;
299 
300                 reportYouTubeTitle(request);
301 
302                 request.results.when = now;
303 
304                 synchronized //()
305                 {
306                     cache[request.url] = cast(shared)request.results;
307                 }
308                 return;
309             }
310             catch (TitleFetchException e)
311             {
312                 if (e.code == 2)
313                 {
314                     import kameloso.constants : MagicErrorStrings;
315 
316                     if (e.msg == MagicErrorStrings.sslLibraryNotFound)
317                     {
318                         enum wikiPattern = cast(string)MagicErrorStrings.visitWikiOneliner;
319                         enum pattern = "Error fetching YouTube title: <l>" ~
320                             MagicErrorStrings.sslLibraryNotFoundRewritten ~
321                             "</> <t>(is OpenSSL installed?)";
322                         request.state.askToError(pattern);
323                         request.state.askToError(wikiPattern);
324 
325                         version(Windows)
326                         {
327                             enum getoptMessage = cast(string)MagicErrorStrings.getOpenSSLSuggestion;
328                             request.state.askToError(getoptMessage);
329                         }
330                     }
331                     else
332                     {
333                         enum pattern = "Error fetching YouTube title: <l>%s";
334                         request.state.askToError(pattern.format(e.msg));
335                     }
336                     return;
337                 }
338                 else if (e.code >= 400)
339                 {
340                     // Simply failed to fetch
341                     enum pattern = "Webtitles YouTube worker saw HTTP <l>%d</>: <t>%s";
342                     request.state.askToError(pattern.format(e.code, e.msg));
343                 }
344                 else
345                 {
346                     request.state.askToError("Error fetching YouTube video information: <l>" ~ e.msg);
347                     //version(PrintStacktraces) request.state.askToTrace(e.info);
348                     // Drop down
349                 }
350             }
351             catch (JSONException e)
352             {
353                 request.state.askToError("Failed to parse YouTube video information: <l>" ~ e.msg);
354                 //version(PrintStacktraces) request.state.askToTrace(e.info);
355                 // Drop down
356             }
357             catch (Exception e)
358             {
359                 request.state.askToError("Unexpected exception fetching YouTube video information: <l>" ~ e.msg);
360                 version(PrintStacktraces) request.state.askToTrace(e.toString);
361                 // Drop down
362             }
363         }
364         else
365         {
366             // Unsure what this is really. Drop down and treat like normal link
367         }
368     }
369 
370     void tryLookup()
371     {
372         import std.format : format;
373         import std.range : only;
374         import core.exception : UnicodeException;
375 
376         foreach (immutable firstTime; only(true, false))
377         {
378             try
379             {
380                 request.results = lookupTitle(request.url, descriptions, caBundleFile);
381                 reportTitle(request);
382                 request.results.when = now;
383 
384                 synchronized //()
385                 {
386                     cache[request.url] = cast(shared)request.results;
387                 }
388             }
389             catch (TitleFetchException e)
390             {
391                 if (e.code == 2)
392                 {
393                     import kameloso.constants : MagicErrorStrings;
394 
395                     if (e.msg == MagicErrorStrings.sslLibraryNotFound)
396                     {
397                         enum wikiPattern = cast(string)MagicErrorStrings.visitWikiOneliner;
398                         enum pattern = "Error fetching webpage title: <l>" ~
399                             MagicErrorStrings.sslLibraryNotFoundRewritten ~
400                             "</> <t>(is OpenSSL installed?)";
401                         request.state.askToError(pattern);
402                         request.state.askToError(wikiPattern);
403 
404                         version(Windows)
405                         {
406                             enum getoptMessage = cast(string)MagicErrorStrings.getOpenSSLSuggestion;
407                             request.state.askToError(getoptMessage);
408                         }
409                     }
410                     else
411                     {
412                         enum pattern = "Error fetching webpage title: <l>%s";
413                         request.state.askToError(pattern.format(e.msg));
414                     }
415                     return;
416                 }
417                 else if (e.code >= 400)
418                 {
419                     // Simply failed to fetch
420                     enum pattern = "Webtitles worker saw HTTP <l>%d</>: <l>%s";
421                     request.state.askToWarn(pattern.format(e.code, e.msg));
422                 }
423                 else
424                 {
425                     // No title tag found
426                     enum pattern = "No title tag found: <l>%s";
427                     request.state.askToWarn(pattern.format(e.msg));
428                 }
429 
430                 if (firstTime)
431                 {
432                     request.state.askToLog("Rewriting URL and retrying...");
433 
434                     if (request.url[$-1] == '/')
435                     {
436                         request.url = request.url[0..$-1];
437                     }
438                     else
439                     {
440                         request.url ~= '/';
441                     }
442                     continue;
443                 }
444             }
445             catch (UnicodeException e)
446             {
447                 enum pattern = "Webtitles worker Unicode exception: <l>%s</> " ~
448                     "(link is probably to an image or similar)";
449                 request.state.askToError(pattern.format(e.msg));
450                 //version(PrintStacktraces) request.state.askToTrace(e.info);
451             }
452             catch (Exception e)
453             {
454                 request.state.askToWarn("Webtitles saw unexpected exception: <l>" ~ e.msg);
455                 version(PrintStacktraces) request.state.askToTrace(e.toString);
456             }
457 
458             // Dropped down; end foreach by returning
459             return;
460         }
461     }
462 
463     tryLookup();
464 }
465 
466 
467 // lookupTitle
468 /++
469     Given a URL, tries to look up the web page title of it.
470 
471     Params:
472         url = URL string to look up.
473         descriptions = Whether or not to look up meta descriptions.
474         caBundleFile = Path to a `cacert.pem` SSL certificate bundle.
475 
476     Returns:
477         A finished [TitleLookupResults].
478 
479     Throws:
480         [TitleFetchException] if URL could not be fetched, or if no title
481         could be divined from it.
482  +/
483 auto lookupTitle(
484     const string url,
485     const Flag!"descriptions" descriptions,
486     const string caBundleFile)
487 {
488     import kameloso.constants : KamelosoInfo, Timeout;
489     import lu.string : beginsWith, nom;
490     import arsd.dom : Document;
491     import arsd.http2 : HttpClient, Uri;
492     import std.algorithm.comparison : among;
493     import std.array : Appender;
494     import std.uni : toLower;
495     import core.time : seconds;
496 
497     // No need to keep a static HttpClient since this will be in a new thread every time
498     auto client = new HttpClient;
499     client.useHttp11 = true;
500     client.keepAlive = false;
501     client.acceptGzip = false;
502     client.defaultTimeout = Timeout.httpGET.seconds;  // FIXME
503     client.userAgent = "kameloso/" ~ cast(string)KamelosoInfo.version_;
504     if (caBundleFile.length) client.setClientCertificate(caBundleFile, caBundleFile);
505 
506     auto req = client.request(Uri(url));
507     auto res = req.waitForCompletion();
508 
509     if (res.code.among!(301, 302, 307, 308))
510     {
511         // Moved
512         foreach (immutable i; 0..5)
513         {
514             req = client.request(Uri(res.location));
515             res = req.waitForCompletion();
516             if (!res.code.among!(301, 302, 307, 308) || !res.location.length) break;
517         }
518     }
519 
520     if ((res.code == 2) || (res.code >= 400) || !res.contentText.length)
521     {
522         // res.codeText among Bad Request, probably Not Found, ...
523         throw new TitleFetchException(
524             res.codeText,
525             url,
526             res.code,
527             __FILE__,
528             __LINE__);
529     }
530 
531     auto doc = new Document;
532     doc.parseGarbage("");  // Work around missing null check, causing segfaults on empty pages
533     doc.parseGarbage(res.responseText);
534 
535     if (!doc.title.length)
536     {
537         enum message = "No title tag found";
538         throw new TitleFetchException(
539             message,
540             url,
541             res.code,
542             __FILE__,
543             __LINE__);
544     }
545 
546     string slice = url;  // mutable
547     slice.nom("//");
548     string host = slice.nom!(Yes.inherit)('/').toLower;
549     if (host.beginsWith("www.")) host = host[4..$];
550 
551     TitleLookupResults results;
552     results.title = decodeEntities(doc.title);
553     results.domain = host;
554 
555     if (descriptions)
556     {
557         import std.algorithm.searching : canFind;
558 
559         if (!descriptionExemptions.canFind(host))
560         {
561             auto metaTags = doc.getElementsByTagName("meta");
562 
563             foreach (tag; metaTags)
564             {
565                 if (tag.name == "description")
566                 {
567                     results.description = decodeEntities(tag.content);
568                     break;
569                 }
570             }
571         }
572     }
573 
574     return results;
575 }
576 
577 
578 // reportTitle
579 /++
580     Echoes the result of a web title lookup to a channel.
581 
582     Params:
583         request = A [TitleLookupRequest] containing the results of the lookup.
584  +/
585 void reportTitle(TitleLookupRequest request)
586 {
587     string line;
588 
589     if (request.results.domain.length)
590     {
591         import std.format : format;
592 
593         immutable maybePipe = request.results.description.length ? " | " : string.init;
594         enum pattern = "[<b>%s<b>] %s%s%s";
595         line = pattern.format(
596             request.results.domain,
597             request.results.title,
598             maybePipe,
599             request.results.description);
600     }
601     else
602     {
603         line = request.results.title;
604     }
605 
606     // "PRIVMSG #12345678901234567890123456789012345678901234567890 :".length == 61
607     enum maxLen = (510 - 61);
608 
609     if (line.length > maxLen)
610     {
611         // " [...]".length == 6
612         line = line[0..(maxLen-6)] ~ " [...]";
613     }
614 
615     chan(request.state, request.event.channel, line);
616 }
617 
618 
619 // reportYouTubeTitle
620 /++
621     Echoes the result of a YouTube lookup to a channel.
622 
623     Params:
624         request = A [TitleLookupRequest] containing the results of the lookup.
625  +/
626 void reportYouTubeTitle(TitleLookupRequest request)
627 {
628     import std.format : format;
629 
630     enum pattern = "[<b>youtube.com<b>] %s (uploaded by <h>%s<h>)";
631     immutable message = pattern.format(request.results.youtubeTitle, request.results.youtubeAuthor);
632     chan(request.state, request.event.channel, message);
633 }
634 
635 
636 // rewriteDirectImgurURL
637 /++
638     Takes a direct imgur link (one that points to an image) and rewrites it to
639     instead point to the image's page.
640 
641     Images (`jpg`, `png`, ...) can naturally not have titles, but the normal pages can.
642 
643     Params:
644         url = String link to rewrite.
645 
646     Returns:
647         A rewritten string if it's a compatible imgur one, else the passed `url`.
648  +/
649 auto rewriteDirectImgurURL(const string url) @safe pure
650 {
651     import lu.string : beginsWith, nom;
652     import std.typecons : No, Yes;
653 
654     if (url.beginsWith("https://i.imgur.com/"))
655     {
656         immutable path = url[20..$].nom!(Yes.decode)('.');
657         return "https://imgur.com/" ~ path;
658     }
659     else if (url.beginsWith("http://i.imgur.com/"))
660     {
661         immutable path = url[19..$].nom!(Yes.decode)('.');
662         return "https://imgur.com/" ~ path;
663     }
664 
665     return url;
666 }
667 
668 ///
669 unittest
670 {
671     {
672         immutable directURL = "https://i.imgur.com/URHe5og.jpg";
673         immutable rewritten = rewriteDirectImgurURL(directURL);
674         assert((rewritten == "https://imgur.com/URHe5og"), rewritten);
675     }
676     {
677         immutable directURL = "http://i.imgur.com/URHe5og.jpg";
678         immutable rewritten = rewriteDirectImgurURL(directURL);
679         assert((rewritten == "https://imgur.com/URHe5og"), rewritten);
680     }
681 }
682 
683 
684 // getYouTubeInfo
685 /++
686     Fetches the JSON description of a YouTube video link, allowing us to report
687     it the page's title without having to actually fetch the video page.
688 
689     Example:
690     ---
691     auto info = getYouTubeInfo("https://www.youtube.com/watch?v=s-mOy8VUEBk");
692     writeln(info["title"].str);
693     writeln(info["author"].str);
694     ---
695 
696     Params:
697         url = A YouTube video link string.
698         caBundleFile = Path to a `cacert.pem` SSL certificate bundle.
699 
700     Returns:
701         A [std.json.JSONValue|JSONValue] with fields describing the looked-up video.
702 
703     Throws:
704         [TitleFetchException] if the YouTube ID was invalid and could not be queried.
705 
706         [std.json.JSONException|JSONException] if the JSON response could not be parsed.
707  +/
708 auto getYouTubeInfo(const string url, const string caBundleFile)
709 {
710     import kameloso.constants : KamelosoInfo, Timeout;
711     import arsd.http2 : HttpClient, Uri;
712     import std.algorithm.comparison : among;
713     import std.array : Appender;
714     import std.exception : assumeUnique;
715     import std.json : parseJSON;
716     import core.time : seconds;
717 
718     // No need to keep a static HttpClient since this will be in a new thread every time
719     auto client = new HttpClient;
720     client.useHttp11 = true;
721     client.keepAlive = false;
722     client.acceptGzip = false;
723     client.defaultTimeout = Timeout.httpGET.seconds;  // FIXME
724     client.userAgent = "kameloso/" ~ cast(string)KamelosoInfo.version_;
725     if (caBundleFile.length) client.setClientCertificate(caBundleFile, caBundleFile);
726 
727     immutable youtubeURL = "https://www.youtube.com/oembed?format=json&url=" ~ url;
728 
729     auto req = client.request(Uri(youtubeURL));
730     auto res = req.waitForCompletion();
731 
732     if (res.code.among!(301, 302, 307, 308))
733     {
734         // Moved
735         foreach (immutable i; 0..5)
736         {
737             req = client.request(Uri(res.location));
738             res = req.waitForCompletion();
739             if (!res.code.among!(301, 302, 307, 308) || !res.location.length) break;
740         }
741     }
742 
743     if ((res.code == 2) || (res.code >= 400) || !res.contentText.length)
744     {
745         // res.codeText among Bad Request, probably Not Found, ...
746         throw new TitleFetchException(res.codeText, url, res.code, __FILE__, __LINE__);
747     }
748 
749     return parseJSON(res.contentText);
750 }
751 
752 
753 // TitleFetchException
754 /++
755     A normal [object.Exception|Exception] but with an HTTP status code attached.
756  +/
757 final class TitleFetchException : Exception
758 {
759 @safe:
760     /// The URL that was attempted to fetch the title of.
761     string url;
762 
763     /// The HTTP status code that was returned when attempting to fetch a title.
764     uint code;
765 
766     /++
767         Create a new [TitleFetchException], attaching a URL and an HTTP status code.
768      +/
769     this(
770         const string message,
771         const string url,
772         const uint code,
773         const string file = __FILE__,
774         const size_t line = __LINE__,
775         Throwable nextInChain = null) pure nothrow @nogc @safe
776     {
777         this.code = code;
778         super(message, file, line, nextInChain);
779     }
780 
781     /++
782         Create a new [TitleFetchException], without attaching anything.
783      +/
784     this(
785         const string message,
786         const string file = __FILE__,
787         const size_t line = __LINE__,
788         Throwable nextInChain = null) pure nothrow @nogc @safe
789     {
790         super(message, file, line, nextInChain);
791     }
792 }
793 
794 
795 // decodeEntities
796 /++
797     Removes unwanted characters from a string, and decodes HTML entities in it
798     (like `&mdash;` and `&nbsp;`).
799 
800     Params:
801         title = Title string to decode entities and remove tags from.
802 
803     Returns:
804         A modified string, with unwanted bits stripped out and/or decoded.
805  +/
806 auto decodeEntities(const string line)
807 {
808     import lu.string : stripped;
809     import arsd.dom : htmlEntitiesDecode;
810     import std.array : replace;
811 
812     return line
813         .replace("\r", string.init)
814         .replace('\n', ' ')
815         .stripped
816         .htmlEntitiesDecode();
817 }
818 
819 ///
820 unittest
821 {
822     immutable t1 = "&quot;Hello&nbsp;world!&quot;";
823     immutable t1p = decodeEntities(t1);
824     assert((t1p == "\"Hello\u00A0world!\""), t1p);  // not a normal space
825 
826     immutable t2 = "&lt;/title&gt;";
827     immutable t2p = decodeEntities(t2);
828     assert((t2p == "</title>"), t2p);
829 
830     immutable t3 = "&mdash;&micro;&acute;&yen;&euro;";
831     immutable t3p = decodeEntities(t3);
832     assert((t3p == "—µ´¥€"), t3p);  // not a normal dash
833 
834     immutable t4 = "&quot;Se&ntilde;or &THORN;&quot; &copy;2017";
835     immutable t4p = decodeEntities(t4);
836     assert((t4p == `"Señor Þ" ©2017`), t4p);
837 
838     immutable t5 = "\n        Nyheter - NSD.se        \n";
839     immutable t5p = decodeEntities(t5);
840     assert(t5p == "Nyheter - NSD.se");
841 }
842 
843 
844 // prune
845 /++
846     Garbage-collects old entries in a `TitleLookupResults[string]` lookup cache.
847 
848     Params:
849         cache = Cache of previous [TitleLookupResults], `shared` so that it can
850             be reused in further lookup (other threads).
851         expireSeconds = After how many seconds a cached entry is considered to
852             have expired and should no longer be used as a valid entry.
853  +/
854 void prune(shared TitleLookupResults[string] cache, const uint expireSeconds)
855 {
856     import lu.objmanip : pruneAA;
857     import std.datetime.systime : Clock;
858 
859     if (!cache.length) return;
860 
861     immutable now = Clock.currTime.toUnixTime;
862 
863     synchronized //()
864     {
865         pruneAA!((entry) => (now - entry.when) > expireSeconds)(cache);
866     }
867 }
868 
869 
870 // initialise
871 /++
872     Initialises the shared cache, else it won't retain changes.
873 
874     Just assign an entry and remove it.
875  +/
876 void initialise(WebtitlesPlugin plugin)
877 {
878     // No need to synchronise this; no worker threads are running
879     plugin.cache[string.init] = TitleLookupResults.init;
880     plugin.cache.remove(string.init);
881 }
882 
883 
884 mixin MinimalAuthentication;
885 mixin PluginRegistration!WebtitlesPlugin;
886 
887 public:
888 
889 
890 // WebtitlesPlugin
891 /++
892     The Webtitles plugin catches HTTP URL links in messages, connects to
893     their servers and and streams the web page itself, looking for the web page's
894     title. This is then reported to the originating channel or personal query.
895  +/
896 final class WebtitlesPlugin : IRCPlugin
897 {
898 private:
899     /// All Webtitles options gathered.
900     WebtitlesSettings webtitlesSettings;
901 
902     /// Cache of recently looked-up web titles.
903     shared TitleLookupResults[string] cache;
904 
905     /++
906         How long before a cached title lookup expires and its address has to be
907         looked up anew.
908      +/
909     enum expireSeconds = 600;
910 
911     /// In the case of chained URL lookups, how many milliseconds to delay each lookup by.
912     enum delayMsecs = 100;
913 
914     /++
915         How big a buffer to initially allocate when downloading web pages to get
916         their titles.
917      +/
918     enum lookupBufferSize = 8192;
919 
920     mixin IRCPluginImpl;
921 }