1 /++ 2 The Webtitles plugin catches URLs pasted in a channel, follows them and 3 reports back the title of the web page that was linked to. 4 5 It has no bot commands; everything is done by automatically scanning channel 6 and private query messages for things that look like links. 7 8 See_Also: 9 https://github.com/zorael/kameloso/wiki/Current-plugins#webtitles, 10 [kameloso.plugins.common.core], 11 [kameloso.plugins.common.misc] 12 13 Copyright: [JR](https://github.com/zorael) 14 License: [Boost Software License 1.0](https://www.boost.org/users/license.html) 15 16 Authors: 17 [JR](https://github.com/zorael) 18 +/ 19 module kameloso.plugins.webtitles; 20 21 version(WithWebtitlesPlugin): 22 23 private: 24 25 import kameloso.plugins; 26 import kameloso.plugins.common.core; 27 import kameloso.plugins.common.awareness : MinimalAuthentication; 28 import kameloso.messaging; 29 import dialect.defs; 30 import std.json : JSONValue; 31 import std.typecons : Flag, No, Yes; 32 33 34 // descriptionExemptions 35 /++ 36 Hostnames explicitly exempt from having their descriptions included after the titles. 37 38 Must be in lowercase. 39 +/ 40 static immutable descriptionExemptions = 41 [ 42 "imgur.com", 43 ]; 44 45 46 // WebtitlesSettings 47 /++ 48 All Webtitles settings, gathered in a struct. 49 +/ 50 @Settings struct WebtitlesSettings 51 { 52 /// Toggles whether or not the plugin should react to events at all. 53 @Enabler bool enabled = true; 54 55 /// Toggles whether or not meta descriptions should be reported next to titles. 56 bool descriptions = true; 57 } 58 59 60 // TitleLookupResults 61 /++ 62 A record of a URL lookup. 63 64 This is both used to aggregate information about the lookup, as well as to 65 add hysteresis to lookups, so we don't look the same one up over and over 66 if they were pasted over and over. 67 +/ 68 struct TitleLookupResults 69 { 70 /// Looked up web page title. 71 string title; 72 73 /// The content of the web page's `description` tag. 74 string description; 75 76 /// Domain name of the looked up URL. 77 string domain; 78 79 /// YouTube video title, if such a YouTube link. 80 string youtubeTitle; 81 82 /// YouTube video author, if such a YouTube link. 83 string youtubeAuthor; 84 85 /// The UNIX timestamp of when the title was looked up. 86 long when; 87 } 88 89 90 // TitleLookupRequest 91 /++ 92 A record of a URL lookup request. 93 94 This is used to aggregate information about a lookup request, making it 95 easier to pass it in between functions. It serves no greater purpose. 96 +/ 97 struct TitleLookupRequest 98 { 99 /// The context state of the requesting plugin instance. 100 IRCPluginState state; 101 102 /// The [dialect.defs.IRCEvent|IRCEvent] that instigated the lookup. 103 IRCEvent event; 104 105 /// URL to look up. 106 string url; 107 108 /// Results of the title lookup. 109 TitleLookupResults results; 110 } 111 112 113 // onMessage 114 /++ 115 Parses a message to see if the message contains one or more URLs. 116 117 It uses a simple state machine in [kameloso.common.findURLs|findURLs] to 118 exhaustively try to look up every URL returned by it. 119 +/ 120 @(IRCEventHandler() 121 .onEvent(IRCEvent.Type.CHAN) 122 .permissionsRequired(Permissions.ignore) 123 .channelPolicy(ChannelPolicy.home) 124 ) 125 void onMessage(WebtitlesPlugin plugin, const ref IRCEvent event) 126 { 127 import kameloso.common : findURLs; 128 import lu.string : beginsWith, strippedLeft; 129 130 if (event.content.strippedLeft.beginsWith(plugin.state.settings.prefix)) return; 131 132 string[] urls = findURLs(event.content); // mutable so nom works 133 if (!urls.length) return; 134 135 return lookupURLs(plugin, event, urls); 136 } 137 138 139 // lookupURLs 140 /++ 141 Looks up the URLs in the passed `string[]` `urls` by spawning a worker 142 thread to do all the work. 143 144 It accesses the cache of already looked up addresses to speed things up. 145 +/ 146 void lookupURLs(WebtitlesPlugin plugin, const /*ref*/ IRCEvent event, string[] urls) 147 { 148 import kameloso.common : logger; 149 import lu.string : beginsWith, nom; 150 import std.concurrency : spawn; 151 152 bool[string] uniques; 153 154 foreach (immutable i, url; urls) 155 { 156 // If the URL contains an octothorpe fragment identifier, like 157 // https://www.google.com/index.html#this%20bit 158 // then strip that. 159 url = url.nom!(Yes.inherit, Yes.decode)('#'); 160 161 while (url[$-1] == '/') 162 { 163 url = url[0..$-1]; 164 } 165 166 if (url in uniques) continue; 167 168 uniques[url] = true; 169 170 enum pattern = "Caught URL: <l>%s"; 171 logger.infof(pattern, url); 172 173 TitleLookupRequest request; 174 request.state = plugin.state; 175 request.event = event; 176 request.url = url; 177 178 if (plugin.cache.length) prune(plugin.cache, plugin.expireSeconds); 179 180 TitleLookupResults cachedResult; 181 182 synchronized //() 183 { 184 if (const cachedResultPointer = url in plugin.cache) 185 { 186 cachedResult = *cachedResultPointer; 187 } 188 } 189 190 if (cachedResult != TitleLookupResults.init) 191 { 192 logger.log("Found cached lookup."); 193 request.results = cachedResult; 194 195 if (request.results.youtubeTitle.length) 196 { 197 reportYouTubeTitle(request); 198 } 199 else 200 { 201 reportTitle(request); 202 } 203 continue; 204 } 205 206 cast(void)spawn(&worker, cast(shared)request, plugin.cache, 207 (i * plugin.delayMsecs), 208 cast(Flag!"descriptions")plugin.webtitlesSettings.descriptions, 209 plugin.state.connSettings.caBundleFile); 210 } 211 212 import kameloso.thread : ThreadMessage; 213 import std.concurrency : prioritySend; 214 215 plugin.state.mainThread.prioritySend(ThreadMessage.shortenReceiveTimeout()); 216 } 217 218 219 // worker 220 /++ 221 Looks up and reports the title of a URL. 222 223 Additionally reports YouTube titles and authors if settings say to do such. 224 225 Worker to be run in its own thread. 226 227 Params: 228 sRequest = Shared [TitleLookupRequest] aggregate with all the state and 229 context needed to look up a URL and report the results to the local terminal. 230 cache = Shared cache of previous [TitleLookupRequest]s. 231 delayMsecs = Milliseconds to delay before doing the lookup, to allow for 232 parallel lookups without bursting all of them at once. 233 descriptions = Whether or not to look up meta descriptions. 234 caBundleFile = Path to a `cacert.pem` SSL certificate bundle. 235 +/ 236 void worker( 237 shared TitleLookupRequest sRequest, 238 shared TitleLookupResults[string] cache, 239 const ulong delayMsecs, 240 const Flag!"descriptions" descriptions, 241 const string caBundleFile) 242 { 243 import lu.string : beginsWith, contains, nom; 244 import std.datetime.systime : Clock; 245 import std.format : format; 246 import std.typecons : No, Yes; 247 static import kameloso.common; 248 249 version(Posix) 250 { 251 import kameloso.thread : setThreadName; 252 setThreadName("webtitles"); 253 } 254 255 // Set the global settings so messaging functions don't segfault us 256 kameloso.common.settings = &(cast()sRequest).state.settings; 257 258 if (delayMsecs > 0) 259 { 260 import core.thread : Thread; 261 import core.time : msecs; 262 Thread.sleep(delayMsecs.msecs); 263 } 264 265 TitleLookupRequest request = cast()sRequest; 266 immutable now = Clock.currTime.toUnixTime; 267 268 if (request.url.contains("://i.imgur.com/")) 269 { 270 // imgur direct links naturally have no titles, but the normal pages do. 271 // Rewrite and look those up instead. 272 request.url = rewriteDirectImgurURL(request.url); 273 } 274 else if (request.url.contains("youtube.com/watch?v=") || 275 request.url.contains("youtu.be/")) 276 { 277 // Do our own slicing instead of using regexes, because footprint. 278 string slice = request.url; 279 280 slice.nom!(Yes.decode)("http"); 281 if (slice[0] == 's') slice = slice[1..$]; 282 slice = slice[3..$]; // :// 283 284 if (slice.beginsWith("www.")) slice = slice[4..$]; 285 286 if (slice.beginsWith("youtube.com/watch?v=") || 287 slice.beginsWith("youtu.be/")) 288 { 289 import std.json : JSONException; 290 291 try 292 { 293 immutable info = getYouTubeInfo(request.url, caBundleFile); 294 295 // Let's assume all YouTube clips have titles and authors 296 // Should we decode the author too? 297 request.results.youtubeTitle = decodeEntities(info["title"].str); 298 request.results.youtubeAuthor = info["author_name"].str; 299 300 reportYouTubeTitle(request); 301 302 request.results.when = now; 303 304 synchronized //() 305 { 306 cache[request.url] = cast(shared)request.results; 307 } 308 return; 309 } 310 catch (TitleFetchException e) 311 { 312 if (e.code == 2) 313 { 314 import kameloso.constants : MagicErrorStrings; 315 316 if (e.msg == MagicErrorStrings.sslLibraryNotFound) 317 { 318 enum wikiPattern = cast(string)MagicErrorStrings.visitWikiOneliner; 319 enum pattern = "Error fetching YouTube title: <l>" ~ 320 MagicErrorStrings.sslLibraryNotFoundRewritten ~ 321 "</> <t>(is OpenSSL installed?)"; 322 request.state.askToError(pattern); 323 request.state.askToError(wikiPattern); 324 325 version(Windows) 326 { 327 enum getoptMessage = cast(string)MagicErrorStrings.getOpenSSLSuggestion; 328 request.state.askToError(getoptMessage); 329 } 330 } 331 else 332 { 333 enum pattern = "Error fetching YouTube title: <l>%s"; 334 request.state.askToError(pattern.format(e.msg)); 335 } 336 return; 337 } 338 else if (e.code >= 400) 339 { 340 // Simply failed to fetch 341 enum pattern = "Webtitles YouTube worker saw HTTP <l>%d</>: <t>%s"; 342 request.state.askToError(pattern.format(e.code, e.msg)); 343 } 344 else 345 { 346 request.state.askToError("Error fetching YouTube video information: <l>" ~ e.msg); 347 //version(PrintStacktraces) request.state.askToTrace(e.info); 348 // Drop down 349 } 350 } 351 catch (JSONException e) 352 { 353 request.state.askToError("Failed to parse YouTube video information: <l>" ~ e.msg); 354 //version(PrintStacktraces) request.state.askToTrace(e.info); 355 // Drop down 356 } 357 catch (Exception e) 358 { 359 request.state.askToError("Unexpected exception fetching YouTube video information: <l>" ~ e.msg); 360 version(PrintStacktraces) request.state.askToTrace(e.toString); 361 // Drop down 362 } 363 } 364 else 365 { 366 // Unsure what this is really. Drop down and treat like normal link 367 } 368 } 369 370 void tryLookup() 371 { 372 import std.format : format; 373 import std.range : only; 374 import core.exception : UnicodeException; 375 376 foreach (immutable firstTime; only(true, false)) 377 { 378 try 379 { 380 request.results = lookupTitle(request.url, descriptions, caBundleFile); 381 reportTitle(request); 382 request.results.when = now; 383 384 synchronized //() 385 { 386 cache[request.url] = cast(shared)request.results; 387 } 388 } 389 catch (TitleFetchException e) 390 { 391 if (e.code == 2) 392 { 393 import kameloso.constants : MagicErrorStrings; 394 395 if (e.msg == MagicErrorStrings.sslLibraryNotFound) 396 { 397 enum wikiPattern = cast(string)MagicErrorStrings.visitWikiOneliner; 398 enum pattern = "Error fetching webpage title: <l>" ~ 399 MagicErrorStrings.sslLibraryNotFoundRewritten ~ 400 "</> <t>(is OpenSSL installed?)"; 401 request.state.askToError(pattern); 402 request.state.askToError(wikiPattern); 403 404 version(Windows) 405 { 406 enum getoptMessage = cast(string)MagicErrorStrings.getOpenSSLSuggestion; 407 request.state.askToError(getoptMessage); 408 } 409 } 410 else 411 { 412 enum pattern = "Error fetching webpage title: <l>%s"; 413 request.state.askToError(pattern.format(e.msg)); 414 } 415 return; 416 } 417 else if (e.code >= 400) 418 { 419 // Simply failed to fetch 420 enum pattern = "Webtitles worker saw HTTP <l>%d</>: <l>%s"; 421 request.state.askToWarn(pattern.format(e.code, e.msg)); 422 } 423 else 424 { 425 // No title tag found 426 enum pattern = "No title tag found: <l>%s"; 427 request.state.askToWarn(pattern.format(e.msg)); 428 } 429 430 if (firstTime) 431 { 432 request.state.askToLog("Rewriting URL and retrying..."); 433 434 if (request.url[$-1] == '/') 435 { 436 request.url = request.url[0..$-1]; 437 } 438 else 439 { 440 request.url ~= '/'; 441 } 442 continue; 443 } 444 } 445 catch (UnicodeException e) 446 { 447 enum pattern = "Webtitles worker Unicode exception: <l>%s</> " ~ 448 "(link is probably to an image or similar)"; 449 request.state.askToError(pattern.format(e.msg)); 450 //version(PrintStacktraces) request.state.askToTrace(e.info); 451 } 452 catch (Exception e) 453 { 454 request.state.askToWarn("Webtitles saw unexpected exception: <l>" ~ e.msg); 455 version(PrintStacktraces) request.state.askToTrace(e.toString); 456 } 457 458 // Dropped down; end foreach by returning 459 return; 460 } 461 } 462 463 tryLookup(); 464 } 465 466 467 // lookupTitle 468 /++ 469 Given a URL, tries to look up the web page title of it. 470 471 Params: 472 url = URL string to look up. 473 descriptions = Whether or not to look up meta descriptions. 474 caBundleFile = Path to a `cacert.pem` SSL certificate bundle. 475 476 Returns: 477 A finished [TitleLookupResults]. 478 479 Throws: 480 [TitleFetchException] if URL could not be fetched, or if no title 481 could be divined from it. 482 +/ 483 auto lookupTitle( 484 const string url, 485 const Flag!"descriptions" descriptions, 486 const string caBundleFile) 487 { 488 import kameloso.constants : KamelosoInfo, Timeout; 489 import lu.string : beginsWith, nom; 490 import arsd.dom : Document; 491 import arsd.http2 : HttpClient, Uri; 492 import std.algorithm.comparison : among; 493 import std.array : Appender; 494 import std.uni : toLower; 495 import core.time : seconds; 496 497 // No need to keep a static HttpClient since this will be in a new thread every time 498 auto client = new HttpClient; 499 client.useHttp11 = true; 500 client.keepAlive = false; 501 client.acceptGzip = false; 502 client.defaultTimeout = Timeout.httpGET.seconds; // FIXME 503 client.userAgent = "kameloso/" ~ cast(string)KamelosoInfo.version_; 504 if (caBundleFile.length) client.setClientCertificate(caBundleFile, caBundleFile); 505 506 auto req = client.request(Uri(url)); 507 auto res = req.waitForCompletion(); 508 509 if (res.code.among!(301, 302, 307, 308)) 510 { 511 // Moved 512 foreach (immutable i; 0..5) 513 { 514 req = client.request(Uri(res.location)); 515 res = req.waitForCompletion(); 516 if (!res.code.among!(301, 302, 307, 308) || !res.location.length) break; 517 } 518 } 519 520 if ((res.code == 2) || (res.code >= 400) || !res.contentText.length) 521 { 522 // res.codeText among Bad Request, probably Not Found, ... 523 throw new TitleFetchException( 524 res.codeText, 525 url, 526 res.code, 527 __FILE__, 528 __LINE__); 529 } 530 531 auto doc = new Document; 532 doc.parseGarbage(""); // Work around missing null check, causing segfaults on empty pages 533 doc.parseGarbage(res.responseText); 534 535 if (!doc.title.length) 536 { 537 enum message = "No title tag found"; 538 throw new TitleFetchException( 539 message, 540 url, 541 res.code, 542 __FILE__, 543 __LINE__); 544 } 545 546 string slice = url; // mutable 547 slice.nom("//"); 548 string host = slice.nom!(Yes.inherit)('/').toLower; 549 if (host.beginsWith("www.")) host = host[4..$]; 550 551 TitleLookupResults results; 552 results.title = decodeEntities(doc.title); 553 results.domain = host; 554 555 if (descriptions) 556 { 557 import std.algorithm.searching : canFind; 558 559 if (!descriptionExemptions.canFind(host)) 560 { 561 auto metaTags = doc.getElementsByTagName("meta"); 562 563 foreach (tag; metaTags) 564 { 565 if (tag.name == "description") 566 { 567 results.description = decodeEntities(tag.content); 568 break; 569 } 570 } 571 } 572 } 573 574 return results; 575 } 576 577 578 // reportTitle 579 /++ 580 Echoes the result of a web title lookup to a channel. 581 582 Params: 583 request = A [TitleLookupRequest] containing the results of the lookup. 584 +/ 585 void reportTitle(TitleLookupRequest request) 586 { 587 string line; 588 589 if (request.results.domain.length) 590 { 591 import std.format : format; 592 593 immutable maybePipe = request.results.description.length ? " | " : string.init; 594 enum pattern = "[<b>%s<b>] %s%s%s"; 595 line = pattern.format( 596 request.results.domain, 597 request.results.title, 598 maybePipe, 599 request.results.description); 600 } 601 else 602 { 603 line = request.results.title; 604 } 605 606 // "PRIVMSG #12345678901234567890123456789012345678901234567890 :".length == 61 607 enum maxLen = (510 - 61); 608 609 if (line.length > maxLen) 610 { 611 // " [...]".length == 6 612 line = line[0..(maxLen-6)] ~ " [...]"; 613 } 614 615 chan(request.state, request.event.channel, line); 616 } 617 618 619 // reportYouTubeTitle 620 /++ 621 Echoes the result of a YouTube lookup to a channel. 622 623 Params: 624 request = A [TitleLookupRequest] containing the results of the lookup. 625 +/ 626 void reportYouTubeTitle(TitleLookupRequest request) 627 { 628 import std.format : format; 629 630 enum pattern = "[<b>youtube.com<b>] %s (uploaded by <h>%s<h>)"; 631 immutable message = pattern.format(request.results.youtubeTitle, request.results.youtubeAuthor); 632 chan(request.state, request.event.channel, message); 633 } 634 635 636 // rewriteDirectImgurURL 637 /++ 638 Takes a direct imgur link (one that points to an image) and rewrites it to 639 instead point to the image's page. 640 641 Images (`jpg`, `png`, ...) can naturally not have titles, but the normal pages can. 642 643 Params: 644 url = String link to rewrite. 645 646 Returns: 647 A rewritten string if it's a compatible imgur one, else the passed `url`. 648 +/ 649 auto rewriteDirectImgurURL(const string url) @safe pure 650 { 651 import lu.string : beginsWith, nom; 652 import std.typecons : No, Yes; 653 654 if (url.beginsWith("https://i.imgur.com/")) 655 { 656 immutable path = url[20..$].nom!(Yes.decode)('.'); 657 return "https://imgur.com/" ~ path; 658 } 659 else if (url.beginsWith("http://i.imgur.com/")) 660 { 661 immutable path = url[19..$].nom!(Yes.decode)('.'); 662 return "https://imgur.com/" ~ path; 663 } 664 665 return url; 666 } 667 668 /// 669 unittest 670 { 671 { 672 immutable directURL = "https://i.imgur.com/URHe5og.jpg"; 673 immutable rewritten = rewriteDirectImgurURL(directURL); 674 assert((rewritten == "https://imgur.com/URHe5og"), rewritten); 675 } 676 { 677 immutable directURL = "http://i.imgur.com/URHe5og.jpg"; 678 immutable rewritten = rewriteDirectImgurURL(directURL); 679 assert((rewritten == "https://imgur.com/URHe5og"), rewritten); 680 } 681 } 682 683 684 // getYouTubeInfo 685 /++ 686 Fetches the JSON description of a YouTube video link, allowing us to report 687 it the page's title without having to actually fetch the video page. 688 689 Example: 690 --- 691 auto info = getYouTubeInfo("https://www.youtube.com/watch?v=s-mOy8VUEBk"); 692 writeln(info["title"].str); 693 writeln(info["author"].str); 694 --- 695 696 Params: 697 url = A YouTube video link string. 698 caBundleFile = Path to a `cacert.pem` SSL certificate bundle. 699 700 Returns: 701 A [std.json.JSONValue|JSONValue] with fields describing the looked-up video. 702 703 Throws: 704 [TitleFetchException] if the YouTube ID was invalid and could not be queried. 705 706 [std.json.JSONException|JSONException] if the JSON response could not be parsed. 707 +/ 708 auto getYouTubeInfo(const string url, const string caBundleFile) 709 { 710 import kameloso.constants : KamelosoInfo, Timeout; 711 import arsd.http2 : HttpClient, Uri; 712 import std.algorithm.comparison : among; 713 import std.array : Appender; 714 import std.exception : assumeUnique; 715 import std.json : parseJSON; 716 import core.time : seconds; 717 718 // No need to keep a static HttpClient since this will be in a new thread every time 719 auto client = new HttpClient; 720 client.useHttp11 = true; 721 client.keepAlive = false; 722 client.acceptGzip = false; 723 client.defaultTimeout = Timeout.httpGET.seconds; // FIXME 724 client.userAgent = "kameloso/" ~ cast(string)KamelosoInfo.version_; 725 if (caBundleFile.length) client.setClientCertificate(caBundleFile, caBundleFile); 726 727 immutable youtubeURL = "https://www.youtube.com/oembed?format=json&url=" ~ url; 728 729 auto req = client.request(Uri(youtubeURL)); 730 auto res = req.waitForCompletion(); 731 732 if (res.code.among!(301, 302, 307, 308)) 733 { 734 // Moved 735 foreach (immutable i; 0..5) 736 { 737 req = client.request(Uri(res.location)); 738 res = req.waitForCompletion(); 739 if (!res.code.among!(301, 302, 307, 308) || !res.location.length) break; 740 } 741 } 742 743 if ((res.code == 2) || (res.code >= 400) || !res.contentText.length) 744 { 745 // res.codeText among Bad Request, probably Not Found, ... 746 throw new TitleFetchException(res.codeText, url, res.code, __FILE__, __LINE__); 747 } 748 749 return parseJSON(res.contentText); 750 } 751 752 753 // TitleFetchException 754 /++ 755 A normal [object.Exception|Exception] but with an HTTP status code attached. 756 +/ 757 final class TitleFetchException : Exception 758 { 759 @safe: 760 /// The URL that was attempted to fetch the title of. 761 string url; 762 763 /// The HTTP status code that was returned when attempting to fetch a title. 764 uint code; 765 766 /++ 767 Create a new [TitleFetchException], attaching a URL and an HTTP status code. 768 +/ 769 this( 770 const string message, 771 const string url, 772 const uint code, 773 const string file = __FILE__, 774 const size_t line = __LINE__, 775 Throwable nextInChain = null) pure nothrow @nogc @safe 776 { 777 this.code = code; 778 super(message, file, line, nextInChain); 779 } 780 781 /++ 782 Create a new [TitleFetchException], without attaching anything. 783 +/ 784 this( 785 const string message, 786 const string file = __FILE__, 787 const size_t line = __LINE__, 788 Throwable nextInChain = null) pure nothrow @nogc @safe 789 { 790 super(message, file, line, nextInChain); 791 } 792 } 793 794 795 // decodeEntities 796 /++ 797 Removes unwanted characters from a string, and decodes HTML entities in it 798 (like `—` and ` `). 799 800 Params: 801 title = Title string to decode entities and remove tags from. 802 803 Returns: 804 A modified string, with unwanted bits stripped out and/or decoded. 805 +/ 806 auto decodeEntities(const string line) 807 { 808 import lu.string : stripped; 809 import arsd.dom : htmlEntitiesDecode; 810 import std.array : replace; 811 812 return line 813 .replace("\r", string.init) 814 .replace('\n', ' ') 815 .stripped 816 .htmlEntitiesDecode(); 817 } 818 819 /// 820 unittest 821 { 822 immutable t1 = ""Hello world!""; 823 immutable t1p = decodeEntities(t1); 824 assert((t1p == "\"Hello\u00A0world!\""), t1p); // not a normal space 825 826 immutable t2 = "</title>"; 827 immutable t2p = decodeEntities(t2); 828 assert((t2p == "</title>"), t2p); 829 830 immutable t3 = "—µ´¥€"; 831 immutable t3p = decodeEntities(t3); 832 assert((t3p == "—µ´¥€"), t3p); // not a normal dash 833 834 immutable t4 = ""Señor Þ" ©2017"; 835 immutable t4p = decodeEntities(t4); 836 assert((t4p == `"Señor Þ" ©2017`), t4p); 837 838 immutable t5 = "\n Nyheter - NSD.se \n"; 839 immutable t5p = decodeEntities(t5); 840 assert(t5p == "Nyheter - NSD.se"); 841 } 842 843 844 // prune 845 /++ 846 Garbage-collects old entries in a `TitleLookupResults[string]` lookup cache. 847 848 Params: 849 cache = Cache of previous [TitleLookupResults], `shared` so that it can 850 be reused in further lookup (other threads). 851 expireSeconds = After how many seconds a cached entry is considered to 852 have expired and should no longer be used as a valid entry. 853 +/ 854 void prune(shared TitleLookupResults[string] cache, const uint expireSeconds) 855 { 856 import lu.objmanip : pruneAA; 857 import std.datetime.systime : Clock; 858 859 if (!cache.length) return; 860 861 immutable now = Clock.currTime.toUnixTime; 862 863 synchronized //() 864 { 865 pruneAA!((entry) => (now - entry.when) > expireSeconds)(cache); 866 } 867 } 868 869 870 // initialise 871 /++ 872 Initialises the shared cache, else it won't retain changes. 873 874 Just assign an entry and remove it. 875 +/ 876 void initialise(WebtitlesPlugin plugin) 877 { 878 // No need to synchronise this; no worker threads are running 879 plugin.cache[string.init] = TitleLookupResults.init; 880 plugin.cache.remove(string.init); 881 } 882 883 884 mixin MinimalAuthentication; 885 mixin PluginRegistration!WebtitlesPlugin; 886 887 public: 888 889 890 // WebtitlesPlugin 891 /++ 892 The Webtitles plugin catches HTTP URL links in messages, connects to 893 their servers and and streams the web page itself, looking for the web page's 894 title. This is then reported to the originating channel or personal query. 895 +/ 896 final class WebtitlesPlugin : IRCPlugin 897 { 898 private: 899 /// All Webtitles options gathered. 900 WebtitlesSettings webtitlesSettings; 901 902 /// Cache of recently looked-up web titles. 903 shared TitleLookupResults[string] cache; 904 905 /++ 906 How long before a cached title lookup expires and its address has to be 907 looked up anew. 908 +/ 909 enum expireSeconds = 600; 910 911 /// In the case of chained URL lookups, how many milliseconds to delay each lookup by. 912 enum delayMsecs = 100; 913 914 /++ 915 How big a buffer to initially allocate when downloading web pages to get 916 their titles. 917 +/ 918 enum lookupBufferSize = 8192; 919 920 mixin IRCPluginImpl; 921 }