String line to examine and find URLs in.
A string[] array of found URLs. These include fragment identifiers.
// Replaces the following: // enum stephenhay = `\bhttps?://[^\s/$.?#].[^\s]*`; // static urlRegex = ctRegex!stephenhay; string[] urls = findURL("blah https://google.com http://facebook.com httpx://wefpokwe"); assert(urls.length == 2);
import std.conv : to; { const urls = findURLs("http://google.com"); assert((urls.length == 1), urls.to!string); assert((urls[0] == "http://google.com"), urls[0]); } { const urls = findURLs("blah https://a.com http://b.com shttps://c https://d.asdf.asdf.asdf "); assert((urls.length == 3), urls.to!string); assert((urls == [ "https://a.com", "http://b.com", "https://d.asdf.asdf.asdf" ]), urls.to!string); } { const urls = findURLs("http:// http://asdf https:// asdfhttpasdf http://google.com"); assert((urls.length == 1), urls.to!string); } { const urls = findURLs("http://a.sehttp://a.shttp://a.http://http:"); assert(!urls.length, urls.to!string); } { const urls = findURLs("blahblah https://motorbörsen.se blhblah"); assert(urls.length, urls.to!string); } { // Let dlang-requests attempt complex URLs, don't validate more than necessary const urls = findURLs("blahblah https://高所恐怖症。co.jp blhblah"); assert(urls.length, urls.to!string); } { const urls = findURLs("nyaa is now at https://nyaa.si, https://nyaa.si? " ~ "https://nyaa.si. https://nyaa.si! and you should use it https://nyaa.si:"); foreach (immutable url; urls) { assert((url == "https://nyaa.si"), url); } } { const urls = findURLs("https://google.se httpx://google.se https://google.se"); assert((urls == [ "https://google.se", "https://google.se" ]), urls.to!string); } { const urls = findURLs("https:// "); assert(!urls.length, urls.to!string); } { const urls = findURLs("http:// "); assert(!urls.length, urls.to!string); }
Finds URLs in a string, returning an array of them. Does not filter out duplicates.
Replacement for regex matching using much less memory when compiling (around ~300mb).
To consider: does this need a dstring?