'use strict'; Object.defineProperty(exports, '__esModule', { value: true }); // THIS FILE IS AUTOMATICALLY GENERATED DO NOT EDIT DIRECTLY // See update-tlds.js for encoding/decoding format // https://data.iana.org/TLD/tlds-alpha-by-domain.txt const encodedTlds = 'aaa1rp3bb0ott3vie4c1le2ogado5udhabi7c0ademy5centure6ountant0s9o1tor4d0s1ult4e0g1ro2tna4f0l1rica5g0akhan5ency5i0g1rbus3force5tel5kdn3l0ibaba4pay4lfinanz6state5y2sace3tom5m0azon4ericanexpress7family11x2fam3ica3sterdam8nalytics7droid5quan4z2o0l2partments8p0le4q0uarelle8r0ab1mco4chi3my2pa2t0e3s0da2ia2sociates9t0hleta5torney7u0ction5di0ble3o3spost5thor3o0s4vianca6w0s2x0a2z0ure5ba0by2idu3namex3narepublic11d1k2r0celona5laycard4s5efoot5gains6seball5ketball8uhaus5yern5b0c1t1va3cg1n2d1e0ats2uty4er2ntley5rlin4st0buy5t2f1g1h0arti5i0ble3d1ke2ng0o3o1z2j1lack0friday9ockbuster8g1omberg7ue3m0s1w2n0pparibas9o0ats3ehringer8fa2m1nd2o0k0ing5sch2tik2on4t1utique6x2r0adesco6idgestone9oadway5ker3ther5ussels7s1t1uild0ers6siness6y1zz3v1w1y1z0h3ca0b1fe2l0l1vinklein9m0era3p2non3petown5ital0one8r0avan4ds2e0er0s4s2sa1e1h1ino4t0ering5holic7ba1n1re3c1d1enter4o1rn3f0a1d2g1h0anel2nel4rity4se2t2eap3intai5ristmas6ome4urch5i0priani6rcle4sco3tadel4i0c2y3k1l0aims4eaning6ick2nic1que6othing5ud3ub0med6m1n1o0ach3des3ffee4llege4ogne5m0cast4mbank4unity6pany2re3uter5sec4ndos3struction8ulting7tact3ractors9oking4l1p2rsica5untry4pon0s4rses6pa2r0edit0card4union9icket5own3s1uise0s6u0isinella9v1w1x1y0mru3ou3z2dabur3d1nce3ta1e1ing3sun4y2clk3ds2e0al0er2s3gree4livery5l1oitte5ta3mocrat6ntal2ist5si0gn4v2hl2iamonds6et2gital5rect0ory7scount3ver5h2y2j1k1m1np2o0cs1tor4g1mains5t1wnload7rive4tv2ubai3nlop4pont4rban5vag2r2z2earth3t2c0o2deka3u0cation8e1g1mail3erck5nergy4gineer0ing9terprises10pson4quipment8r0icsson6ni3s0q1tate5t1u0rovision8s2vents5xchange6pert3osed4ress5traspace10fage2il1rwinds6th3mily4n0s2rm0ers5shion4t3edex3edback6rrari3ero6i0delity5o2lm2nal1nce1ial7re0stone6mdale6sh0ing5t0ness6j1k1lickr3ghts4r2orist4wers5y2m1o0o0d1tball6rd1ex2sale4um3undation8x2r0ee1senius7l1ogans4ntier7tr2ujitsu5n0d2rniture7tbol5yi3ga0l0lery3o1up4me0s3p1rden4y2b0iz3d0n2e0a1nt0ing5orge5f1g0ee3h1i0ft0s3ves2ing5l0ass3e1obal2o4m0ail3bh2o1x2n1odaddy5ld0point6f2o0dyear5g0le4p1t1v2p1q1r0ainger5phics5tis4een3ipe3ocery4up4s1t1u0ardian6cci3ge2ide2tars5ru3w1y2hair2mburg5ngout5us3bo2dfc0bank7ealth0care8lp1sinki6re1mes5iphop4samitsu7tachi5v2k0t2m1n1ockey4ldings5iday5medepot5goods5s0ense7nda3rse3spital5t0ing5t0els3mail5use3w2r1sbc3t1u0ghes5yatt3undai7ibm2cbc2e1u2d1e0ee3fm2kano4l1m0amat4db2mo0bilien9n0c1dustries8finiti5o2g1k1stitute6urance4e4t0ernational10uit4vestments10o1piranga7q1r0ish4s0maili5t0anbul7t0au2v3jaguar4va3cb2e0ep2tzt3welry6io2ll2m0p2nj2o0bs1urg4t1y2p0morgan6rs3uegos4niper7kaufen5ddi3e0rryhotels6logistics9properties14fh2g1h1i0a1ds2m1ndle4tchen5wi3m1n1oeln3matsu5sher5p0mg2n2r0d1ed3uokgroup8w1y0oto4z2la0caixa5mborghini8er3ncaster6d0rover6xess5salle5t0ino3robe5w0yer5b1c1ds2ease3clerc5frak4gal2o2xus4gbt3i0dl2fe0insurance9style7ghting6ke2lly3mited4o2ncoln4k2psy3ve1ing5k1lc1p2oan0s3cker3us3l1ndon4tte1o3ve3pl0financial11r1s1t0d0a3u0ndbeck6xe1ury5v1y2ma0drid4if1son4keup4n0agement7go3p1rket0ing3s4riott5shalls7ttel5ba2c0kinsey7d1e0d0ia3et2lbourne7me1orial6n0u2rckmsd7g1h1iami3crosoft7l1ni1t2t0subishi9k1l0b1s2m0a2n1o0bi0le4da2e1i1m1nash3ey2ster5rmon3tgage6scow4to0rcycles9v0ie4p1q1r1s0d2t0n1r2u0seum3ic4v1w1x1y1z2na0b1goya4me2tura4vy3ba2c1e0c1t0bank4flix4work5ustar5w0s2xt0direct7us4f0l2g0o2hk2i0co2ke1on3nja3ssan1y5l1o0kia3rton4w0ruz3tv4p1r0a1w2tt2u1yc2z2obi1server7ffice5kinawa6layan0group9dnavy5lo3m0ega4ne1g1l0ine5oo2pen3racle3nge4g0anic5igins6saka4tsuka4t2vh3pa0ge2nasonic7ris2s1tners4s1y3y2ccw3e0t2f0izer5g1h0armacy6d1ilips5one2to0graphy6s4ysio5ics1tet2ures6d1n0g1k2oneer5zza4k1l0ace2y0station9umbing5s3m1n0c2ohl2ker3litie5rn2st3r0america6xi3ess3ime3o0d0uctions8f1gressive8mo2perties3y5tection8u0dential9s1t1ub2w0c2y2qa1pon3uebec3st5racing4dio4e0ad1lestate6tor2y4cipes5d0stone5umbrella9hab3ise0n3t2liance6n0t0als5pair3ort3ublican8st0aurant8view0s5xroth6ich0ardli6oh3l1o1p2o0cks3deo3gers4om3s0vp3u0gby3hr2n2w0e2yukyu6sa0arland6fe0ty4kura4le1on3msclub4ung5ndvik0coromant12ofi4p1rl2s1ve2xo3b0i1s2c0a1b1haeffler7midt4olarships8ol3ule3warz5ience5ot3d1e0arch3t2cure1ity6ek2lect4ner3rvices6ven3w1x0y3fr2g1h0angrila6rp2w2ell3ia1ksha5oes2p0ping5uji3w3i0lk2na1gles5te3j1k0i0n2y0pe4l0ing4m0art3ile4n0cf3o0ccer3ial4ftbank4ware6hu2lar2utions7ng1y2y2pa0ce3ort2t3r0l2s1t0ada2ples4r1tebank4farm7c0group6ockholm6rage3e3ream4udio2y3yle4u0cks3pplies3y2ort5rf1gery5zuki5v1watch4iss4x1y0dney4stems6z2tab1ipei4lk2obao4rget4tamotors6r2too4x0i3c0i2d0k2eam2ch0nology8l1masek5nnis4va3f1g1h0d1eater2re6iaa2ckets5enda4ps2res2ol4j0maxx4x2k0maxx5l1m0all4n1o0day3kyo3ols3p1ray3shiba5tal3urs3wn2yota3s3r0ade1ing4ining5vel0ers0insurance16ust3v2t1ube2i1nes3shu4v0s2w1z2ua1bank3s2g1k1nicom3versity8o2ol2ps2s1y1z2va0cations7na1guard7c1e0gas3ntures6risign5mögensberater2ung14sicherung10t2g1i0ajes4deo3g1king4llas4n1p1rgin4sa1ion4va1o3laanderen9n1odka3lvo3te1ing3o2yage5u2wales2mart4ter4ng0gou5tch0es6eather0channel12bcam3er2site5d0ding5ibo2r3f1hoswho6ien2ki2lliamhill9n0dows4e1ners6me2olterskluwer11odside6rk0s2ld3w2s1tc1f3xbox3erox4finity6ihuan4n2xx2yz3yachts4hoo3maxun5ndex5e1odobashi7ga2kohama6u0tube6t1un3za0ppos4ra3ero3ip2m1one3uerich6w2'; // Internationalized domain names containing non-ASCII const encodedUtlds = 'ελ1υ2бг1ел3дети4ею2католик6ом3мкд2он1сква6онлайн5рг3рус2ф2сайт3рб3укр3қаз3հայ3ישראל5קום3ابوظبي5رامكو5لاردن4بحرين5جزائر5سعودية6عليان5مغرب5مارات5یران5بارت2زار4يتك3ھارت5تونس4سودان3رية5شبكة4عراق2ب2مان4فلسطين6قطر3كاثوليك6وم3مصر2ليسيا5وريتانيا7قع4همراه5پاکستان7ڀارت4कॉम3नेट3भारत0म्3ोत5संगठन5বাংলা5ভারত2ৰত4ਭਾਰਤ4ભારત4ଭାରତ4இந்தியா6லங்கை6சிங்கப்பூர்11భారత్5ಭಾರತ4ഭാരതം5ලංකා4คอม3ไทย3ລາວ3გე2みんな3アマゾン4クラウド4グーグル4コム2ストア3セール3ファッション6ポイント4世界2中信1国1國1文网3亚马逊3企业2佛山2信息2健康2八卦2公司1益2台湾1灣2商城1店1标2嘉里0大酒店5在线2大拿2天主教3娱乐2家電2广东2微博2慈善2我爱你3手机2招聘2政务1府2新加坡2闻2时尚2書籍2机构2淡马锡3游戏2澳門2点看2移动2组织机构4网址1店1站1络2联通2谷歌2购物2通販2集团2電訊盈科4飞利浦3食品2餐厅2香格里拉3港2닷넷1컴2삼성2한국2'; /** * @template A * @template B * @param {A} target * @param {B} properties * @return {A & B} */ const assign = (target, properties) => { for (const key in properties) { target[key] = properties[key]; } return target; }; /** * Finite State Machine generation utilities */ /** * @template T * @typedef {{ [group: string]: T[] }} Collections */ /** * @typedef {{ [group: string]: true }} Flags */ // Keys in scanner Collections instances const numeric = 'numeric'; const ascii = 'ascii'; const alpha = 'alpha'; const asciinumeric = 'asciinumeric'; const alphanumeric = 'alphanumeric'; const domain = 'domain'; const emoji = 'emoji'; const scheme = 'scheme'; const slashscheme = 'slashscheme'; const whitespace = 'whitespace'; /** * @template T * @param {string} name * @param {Collections} groups to register in * @returns {T[]} Current list of tokens in the given collection */ function registerGroup(name, groups) { if (!(name in groups)) { groups[name] = []; } return groups[name]; } /** * @template T * @param {T} t token to add * @param {Collections} groups * @param {Flags} flags */ function addToGroups(t, flags, groups) { if (flags[numeric]) { flags[asciinumeric] = true; flags[alphanumeric] = true; } if (flags[ascii]) { flags[asciinumeric] = true; flags[alpha] = true; } if (flags[asciinumeric]) { flags[alphanumeric] = true; } if (flags[alpha]) { flags[alphanumeric] = true; } if (flags[alphanumeric]) { flags[domain] = true; } if (flags[emoji]) { flags[domain] = true; } for (const k in flags) { const group = registerGroup(k, groups); if (group.indexOf(t) < 0) { group.push(t); } } } /** * @template T * @param {T} t token to check * @param {Collections} groups * @returns {Flags} group flags that contain this token */ function flagsForToken(t, groups) { const result = {}; for (const c in groups) { if (groups[c].indexOf(t) >= 0) { result[c] = true; } } return result; } /** * @template T * @typedef {null | T } Transition */ /** * Define a basic state machine state. j is the list of character transitions, * jr is the list of regex-match transitions, jd is the default state to * transition to t is the accepting token type, if any. If this is the terminal * state, then it does not emit a token. * * The template type T represents the type of the token this state accepts. This * should be a string (such as of the token exports in `text.js`) or a * MultiToken subclass (from `multi.js`) * * @template T * @param {T} [token] Token that this state emits */ function State(token) { if (token === void 0) { token = null; } // this.n = null; // DEBUG: State name /** @type {{ [input: string]: State }} j */ this.j = {}; // IMPLEMENTATION 1 // this.j = []; // IMPLEMENTATION 2 /** @type {[RegExp, State][]} jr */ this.jr = []; /** @type {?State} jd */ this.jd = null; /** @type {?T} t */ this.t = token; } /** * Scanner token groups * @type Collections */ State.groups = {}; State.prototype = { accepts() { return !!this.t; }, /** * Follow an existing transition from the given input to the next state. * Does not mutate. * @param {string} input character or token type to transition on * @returns {?State} the next state, if any */ go(input) { const state = this; const nextState = state.j[input]; if (nextState) { return nextState; } for (let i = 0; i < state.jr.length; i++) { const regex = state.jr[i][0]; const nextState = state.jr[i][1]; // note: might be empty to prevent default jump if (nextState && regex.test(input)) { return nextState; } } // Nowhere left to jump! Return default, if any return state.jd; }, /** * Whether the state has a transition for the given input. Set the second * argument to true to only look for an exact match (and not a default or * regular-expression-based transition) * @param {string} input * @param {boolean} exactOnly */ has(input, exactOnly) { if (exactOnly === void 0) { exactOnly = false; } return exactOnly ? input in this.j : !!this.go(input); }, /** * Short for "transition all"; create a transition from the array of items * in the given list to the same final resulting state. * @param {string | string[]} inputs Group of inputs to transition on * @param {Transition | State} [next] Transition options * @param {Flags} [flags] Collections flags to add token to * @param {Collections} [groups] Master list of token groups */ ta(inputs, next, flags, groups) { for (let i = 0; i < inputs.length; i++) { this.tt(inputs[i], next, flags, groups); } }, /** * Short for "take regexp transition"; defines a transition for this state * when it encounters a token which matches the given regular expression * @param {RegExp} regexp Regular expression transition (populate first) * @param {T | State} [next] Transition options * @param {Flags} [flags] Collections flags to add token to * @param {Collections} [groups] Master list of token groups * @returns {State} taken after the given input */ tr(regexp, next, flags, groups) { groups = groups || State.groups; let nextState; if (next && next.j) { nextState = next; } else { // Token with maybe token groups nextState = new State(next); if (flags && groups) { addToGroups(next, flags, groups); } } this.jr.push([regexp, nextState]); return nextState; }, /** * Short for "take transitions", will take as many sequential transitions as * the length of the given input and returns the * resulting final state. * @param {string | string[]} input * @param {T | State} [next] Transition options * @param {Flags} [flags] Collections flags to add token to * @param {Collections} [groups] Master list of token groups * @returns {State} taken after the given input */ ts(input, next, flags, groups) { let state = this; const len = input.length; if (!len) { return state; } for (let i = 0; i < len - 1; i++) { state = state.tt(input[i]); } return state.tt(input[len - 1], next, flags, groups); }, /** * Short for "take transition", this is a method for building/working with * state machines. * * If a state already exists for the given input, returns it. * * If a token is specified, that state will emit that token when reached by * the linkify engine. * * If no state exists, it will be initialized with some default transitions * that resemble existing default transitions. * * If a state is given for the second argument, that state will be * transitioned to on the given input regardless of what that input * previously did. * * Specify a token group flags to define groups that this token belongs to. * The token will be added to corresponding entires in the given groups * object. * * @param {string} input character, token type to transition on * @param {T | State} [next] Transition options * @param {Flags} [flags] Collections flags to add token to * @param {Collections} [groups] Master list of groups * @returns {State} taken after the given input */ tt(input, next, flags, groups) { groups = groups || State.groups; const state = this; // Check if existing state given, just a basic transition if (next && next.j) { state.j[input] = next; return next; } const t = next; // Take the transition with the usual default mechanisms and use that as // a template for creating the next state let nextState, templateState = state.go(input); if (templateState) { nextState = new State(); assign(nextState.j, templateState.j); nextState.jr.push.apply(nextState.jr, templateState.jr); nextState.jd = templateState.jd; nextState.t = templateState.t; } else { nextState = new State(); } if (t) { // Ensure newly token is in the same groups as the old token if (groups) { if (nextState.t && typeof nextState.t === 'string') { const allFlags = assign(flagsForToken(nextState.t, groups), flags); addToGroups(t, allFlags, groups); } else if (flags) { addToGroups(t, flags, groups); } } nextState.t = t; // overwrite anything that was previously there } state.j[input] = nextState; return nextState; } }; // Helper functions to improve minification (not exported outside linkifyjs module) /** * @template T * @param {State} state * @param {string | string[]} input * @param {Flags} [flags] * @param {Collections} [groups] */ const ta = (state, input, next, flags, groups) => state.ta(input, next, flags, groups); /** * @template T * @param {State} state * @param {RegExp} regexp * @param {T | State} [next] * @param {Flags} [flags] * @param {Collections} [groups] */ const tr = (state, regexp, next, flags, groups) => state.tr(regexp, next, flags, groups); /** * @template T * @param {State} state * @param {string | string[]} input * @param {T | State} [next] * @param {Flags} [flags] * @param {Collections} [groups] */ const ts = (state, input, next, flags, groups) => state.ts(input, next, flags, groups); /** * @template T * @param {State} state * @param {string} input * @param {T | State} [next] * @param {Collections} [groups] * @param {Flags} [flags] */ const tt = (state, input, next, flags, groups) => state.tt(input, next, flags, groups); /****************************************************************************** Text Tokens Identifiers for token outputs from the regexp scanner ******************************************************************************/ // A valid web domain token const WORD = 'WORD'; // only contains a-z const UWORD = 'UWORD'; // contains letters other than a-z, used for IDN // Special case of word const LOCALHOST = 'LOCALHOST'; // Valid top-level domain, special case of WORD (see tlds.js) const TLD = 'TLD'; // Valid IDN TLD, special case of UWORD (see tlds.js) const UTLD = 'UTLD'; // The scheme portion of a web URI protocol. Supported types include: `mailto`, // `file`, and user-defined custom protocols. Limited to schemes that contain // only letters const SCHEME = 'SCHEME'; // Similar to SCHEME, except makes distinction for schemes that must always be // followed by `://`, not just `:`. Supported types include `http`, `https`, // `ftp`, `ftps` const SLASH_SCHEME = 'SLASH_SCHEME'; // Any sequence of digits 0-9 const NUM = 'NUM'; // Any number of consecutive whitespace characters that are not newline const WS = 'WS'; // New line (unix style) const NL$1 = 'NL'; // \n // Opening/closing bracket classes // TODO: Rename OPEN -> LEFT and CLOSE -> RIGHT in v5 to fit with Unicode names // Also rename angle brackes to LESSTHAN and GREATER THAN const OPENBRACE = 'OPENBRACE'; // { const CLOSEBRACE = 'CLOSEBRACE'; // } const OPENBRACKET = 'OPENBRACKET'; // [ const CLOSEBRACKET = 'CLOSEBRACKET'; // ] const OPENPAREN = 'OPENPAREN'; // ( const CLOSEPAREN = 'CLOSEPAREN'; // ) const OPENANGLEBRACKET = 'OPENANGLEBRACKET'; // < const CLOSEANGLEBRACKET = 'CLOSEANGLEBRACKET'; // > const FULLWIDTHLEFTPAREN = 'FULLWIDTHLEFTPAREN'; // ( const FULLWIDTHRIGHTPAREN = 'FULLWIDTHRIGHTPAREN'; // ) const LEFTCORNERBRACKET = 'LEFTCORNERBRACKET'; // 「 const RIGHTCORNERBRACKET = 'RIGHTCORNERBRACKET'; // 」 const LEFTWHITECORNERBRACKET = 'LEFTWHITECORNERBRACKET'; // 『 const RIGHTWHITECORNERBRACKET = 'RIGHTWHITECORNERBRACKET'; // 』 const FULLWIDTHLESSTHAN = 'FULLWIDTHLESSTHAN'; // < const FULLWIDTHGREATERTHAN = 'FULLWIDTHGREATERTHAN'; // > // Various symbols const AMPERSAND = 'AMPERSAND'; // & const APOSTROPHE = 'APOSTROPHE'; // ' const ASTERISK = 'ASTERISK'; // * const AT = 'AT'; // @ const BACKSLASH = 'BACKSLASH'; // \ const BACKTICK = 'BACKTICK'; // ` const CARET = 'CARET'; // ^ const COLON = 'COLON'; // : const COMMA = 'COMMA'; // , const DOLLAR = 'DOLLAR'; // $ const DOT = 'DOT'; // . const EQUALS = 'EQUALS'; // = const EXCLAMATION = 'EXCLAMATION'; // ! const HYPHEN = 'HYPHEN'; // - const PERCENT = 'PERCENT'; // % const PIPE = 'PIPE'; // | const PLUS = 'PLUS'; // + const POUND = 'POUND'; // # const QUERY = 'QUERY'; // ? const QUOTE = 'QUOTE'; // " const SEMI = 'SEMI'; // ; const SLASH = 'SLASH'; // / const TILDE = 'TILDE'; // ~ const UNDERSCORE = 'UNDERSCORE'; // _ // Emoji symbol const EMOJI$1 = 'EMOJI'; // Default token - anything that is not one of the above const SYM = 'SYM'; var tk = /*#__PURE__*/Object.freeze({ __proto__: null, WORD: WORD, UWORD: UWORD, LOCALHOST: LOCALHOST, TLD: TLD, UTLD: UTLD, SCHEME: SCHEME, SLASH_SCHEME: SLASH_SCHEME, NUM: NUM, WS: WS, NL: NL$1, OPENBRACE: OPENBRACE, CLOSEBRACE: CLOSEBRACE, OPENBRACKET: OPENBRACKET, CLOSEBRACKET: CLOSEBRACKET, OPENPAREN: OPENPAREN, CLOSEPAREN: CLOSEPAREN, OPENANGLEBRACKET: OPENANGLEBRACKET, CLOSEANGLEBRACKET: CLOSEANGLEBRACKET, FULLWIDTHLEFTPAREN: FULLWIDTHLEFTPAREN, FULLWIDTHRIGHTPAREN: FULLWIDTHRIGHTPAREN, LEFTCORNERBRACKET: LEFTCORNERBRACKET, RIGHTCORNERBRACKET: RIGHTCORNERBRACKET, LEFTWHITECORNERBRACKET: LEFTWHITECORNERBRACKET, RIGHTWHITECORNERBRACKET: RIGHTWHITECORNERBRACKET, FULLWIDTHLESSTHAN: FULLWIDTHLESSTHAN, FULLWIDTHGREATERTHAN: FULLWIDTHGREATERTHAN, AMPERSAND: AMPERSAND, APOSTROPHE: APOSTROPHE, ASTERISK: ASTERISK, AT: AT, BACKSLASH: BACKSLASH, BACKTICK: BACKTICK, CARET: CARET, COLON: COLON, COMMA: COMMA, DOLLAR: DOLLAR, DOT: DOT, EQUALS: EQUALS, EXCLAMATION: EXCLAMATION, HYPHEN: HYPHEN, PERCENT: PERCENT, PIPE: PIPE, PLUS: PLUS, POUND: POUND, QUERY: QUERY, QUOTE: QUOTE, SEMI: SEMI, SLASH: SLASH, TILDE: TILDE, UNDERSCORE: UNDERSCORE, EMOJI: EMOJI$1, SYM: SYM }); // Note that these two Unicode ones expand into a really big one with Babel const ASCII_LETTER = /[a-z]/; const LETTER = /\p{L}/u; // Any Unicode character with letter data type const EMOJI = /\p{Emoji}/u; // Any Unicode emoji character const EMOJI_VARIATION$1 = /\ufe0f/; const DIGIT = /\d/; const SPACE = /\s/; var regexp = /*#__PURE__*/Object.freeze({ __proto__: null, ASCII_LETTER: ASCII_LETTER, LETTER: LETTER, EMOJI: EMOJI, EMOJI_VARIATION: EMOJI_VARIATION$1, DIGIT: DIGIT, SPACE: SPACE }); /** The scanner provides an interface that takes a string of text as input, and outputs an array of tokens instances that can be used for easy URL parsing. */ const NL = '\n'; // New line character const EMOJI_VARIATION = '\ufe0f'; // Variation selector, follows heart and others const EMOJI_JOINER = '\u200d'; // zero-width joiner let tlds = null, utlds = null; // don't change so only have to be computed once /** * Scanner output token: * - `t` is the token name (e.g., 'NUM', 'EMOJI', 'TLD') * - `v` is the value of the token (e.g., '123', '❤️', 'com') * - `s` is the start index of the token in the original string * - `e` is the end index of the token in the original string * @typedef {{t: string, v: string, s: number, e: number}} Token */ /** * @template T * @typedef {{ [collection: string]: T[] }} Collections */ /** * Initialize the scanner character-based state machine for the given start * state * @param {[string, boolean][]} customSchemes List of custom schemes, where each * item is a length-2 tuple with the first element set to the string scheme, and * the second element set to `true` if the `://` after the scheme is optional */ function init$2(customSchemes) { if (customSchemes === void 0) { customSchemes = []; } // Frequently used states (name argument removed during minification) /** @type Collections */ const groups = {}; // of tokens State.groups = groups; /** @type State */ const Start = new State(); if (tlds == null) { tlds = decodeTlds(encodedTlds); } if (utlds == null) { utlds = decodeTlds(encodedUtlds); } // States for special URL symbols that accept immediately after start tt(Start, "'", APOSTROPHE); tt(Start, '{', OPENBRACE); tt(Start, '}', CLOSEBRACE); tt(Start, '[', OPENBRACKET); tt(Start, ']', CLOSEBRACKET); tt(Start, '(', OPENPAREN); tt(Start, ')', CLOSEPAREN); tt(Start, '<', OPENANGLEBRACKET); tt(Start, '>', CLOSEANGLEBRACKET); tt(Start, '(', FULLWIDTHLEFTPAREN); tt(Start, ')', FULLWIDTHRIGHTPAREN); tt(Start, '「', LEFTCORNERBRACKET); tt(Start, '」', RIGHTCORNERBRACKET); tt(Start, '『', LEFTWHITECORNERBRACKET); tt(Start, '』', RIGHTWHITECORNERBRACKET); tt(Start, '<', FULLWIDTHLESSTHAN); tt(Start, '>', FULLWIDTHGREATERTHAN); tt(Start, '&', AMPERSAND); tt(Start, '*', ASTERISK); tt(Start, '@', AT); tt(Start, '`', BACKTICK); tt(Start, '^', CARET); tt(Start, ':', COLON); tt(Start, ',', COMMA); tt(Start, '$', DOLLAR); tt(Start, '.', DOT); tt(Start, '=', EQUALS); tt(Start, '!', EXCLAMATION); tt(Start, '-', HYPHEN); tt(Start, '%', PERCENT); tt(Start, '|', PIPE); tt(Start, '+', PLUS); tt(Start, '#', POUND); tt(Start, '?', QUERY); tt(Start, '"', QUOTE); tt(Start, '/', SLASH); tt(Start, ';', SEMI); tt(Start, '~', TILDE); tt(Start, '_', UNDERSCORE); tt(Start, '\\', BACKSLASH); const Num = tr(Start, DIGIT, NUM, { [numeric]: true }); tr(Num, DIGIT, Num); // State which emits a word token const Word = tr(Start, ASCII_LETTER, WORD, { [ascii]: true }); tr(Word, ASCII_LETTER, Word); // Same as previous, but specific to non-fsm.ascii alphabet words const UWord = tr(Start, LETTER, UWORD, { [alpha]: true }); tr(UWord, ASCII_LETTER); // Non-accepting tr(UWord, LETTER, UWord); // Whitespace jumps // Tokens of only non-newline whitespace are arbitrarily long // If any whitespace except newline, more whitespace! const Ws = tr(Start, SPACE, WS, { [whitespace]: true }); tt(Start, NL, NL$1, { [whitespace]: true }); tt(Ws, NL); // non-accepting state to avoid mixing whitespaces tr(Ws, SPACE, Ws); // Emoji tokens. They are not grouped by the scanner except in cases where a // zero-width joiner is present const Emoji = tr(Start, EMOJI, EMOJI$1, { [emoji]: true }); tr(Emoji, EMOJI, Emoji); tt(Emoji, EMOJI_VARIATION, Emoji); // tt(Start, EMOJI_VARIATION, Emoji); // This one is sketchy const EmojiJoiner = tt(Emoji, EMOJI_JOINER); tr(EmojiJoiner, EMOJI, Emoji); // tt(EmojiJoiner, EMOJI_VARIATION, Emoji); // also sketchy // Generates states for top-level domains // Note that this is most accurate when tlds are in alphabetical order const wordjr = [[ASCII_LETTER, Word]]; const uwordjr = [[ASCII_LETTER, null], [LETTER, UWord]]; for (let i = 0; i < tlds.length; i++) { fastts(Start, tlds[i], TLD, WORD, wordjr); } for (let i = 0; i < utlds.length; i++) { fastts(Start, utlds[i], UTLD, UWORD, uwordjr); } addToGroups(TLD, { tld: true, ascii: true }, groups); addToGroups(UTLD, { utld: true, alpha: true }, groups); // Collect the states generated by different protocols. NOTE: If any new TLDs // get added that are also protocols, set the token to be the same as the // protocol to ensure parsing works as expected. fastts(Start, 'file', SCHEME, WORD, wordjr); fastts(Start, 'mailto', SCHEME, WORD, wordjr); fastts(Start, 'http', SLASH_SCHEME, WORD, wordjr); fastts(Start, 'https', SLASH_SCHEME, WORD, wordjr); fastts(Start, 'ftp', SLASH_SCHEME, WORD, wordjr); fastts(Start, 'ftps', SLASH_SCHEME, WORD, wordjr); addToGroups(SCHEME, { scheme: true, ascii: true }, groups); addToGroups(SLASH_SCHEME, { slashscheme: true, ascii: true }, groups); // Register custom schemes. Assumes each scheme is asciinumeric with hyphens customSchemes = customSchemes.sort((a, b) => a[0] > b[0] ? 1 : -1); for (let i = 0; i < customSchemes.length; i++) { const sch = customSchemes[i][0]; const optionalSlashSlash = customSchemes[i][1]; const flags = optionalSlashSlash ? { [scheme]: true } : { [slashscheme]: true }; if (sch.indexOf('-') >= 0) { flags[domain] = true; } else if (!ASCII_LETTER.test(sch)) { flags[numeric] = true; // numbers only } else if (DIGIT.test(sch)) { flags[asciinumeric] = true; } else { flags[ascii] = true; } ts(Start, sch, sch, flags); } // Localhost token ts(Start, 'localhost', LOCALHOST, { ascii: true }); // Set default transition for start state (some symbol) Start.jd = new State(SYM); return { start: Start, tokens: assign({ groups }, tk) }; } /** Given a string, returns an array of TOKEN instances representing the composition of that string. @method run @param {State} start scanner starting state @param {string} str input string to scan @return {Token[]} list of tokens, each with a type and value */ function run$1(start, str) { // State machine is not case sensitive, so input is tokenized in lowercased // form (still returns regular case). Uses selective `toLowerCase` because // lowercasing the entire string causes the length and character position to // vary in some non-English strings with V8-based runtimes. const iterable = stringToArray(str.replace(/[A-Z]/g, c => c.toLowerCase())); const charCount = iterable.length; // <= len if there are emojis, etc const tokens = []; // return value // cursor through the string itself, accounting for characters that have // width with length 2 such as emojis let cursor = 0; // Cursor through the array-representation of the string let charCursor = 0; // Tokenize the string while (charCursor < charCount) { let state = start; let nextState = null; let tokenLength = 0; let latestAccepting = null; let sinceAccepts = -1; let charsSinceAccepts = -1; while (charCursor < charCount && (nextState = state.go(iterable[charCursor]))) { state = nextState; // Keep track of the latest accepting state if (state.accepts()) { sinceAccepts = 0; charsSinceAccepts = 0; latestAccepting = state; } else if (sinceAccepts >= 0) { sinceAccepts += iterable[charCursor].length; charsSinceAccepts++; } tokenLength += iterable[charCursor].length; cursor += iterable[charCursor].length; charCursor++; } // Roll back to the latest accepting state cursor -= sinceAccepts; charCursor -= charsSinceAccepts; tokenLength -= sinceAccepts; // No more jumps, just make a new token from the last accepting one tokens.push({ t: latestAccepting.t, // token type/name v: str.slice(cursor - tokenLength, cursor), // string value s: cursor - tokenLength, // start index e: cursor // end index (excluding) }); } return tokens; } /** * Convert a String to an Array of characters, taking into account that some * characters like emojis take up two string indexes. * * Adapted from core-js (MIT license) * https://github.com/zloirock/core-js/blob/2d69cf5f99ab3ea3463c395df81e5a15b68f49d9/packages/core-js/internals/string-multibyte.js * * @function stringToArray * @param {string} str * @returns {string[]} */ function stringToArray(str) { const result = []; const len = str.length; let index = 0; while (index < len) { let first = str.charCodeAt(index); let second; let char = first < 0xd800 || first > 0xdbff || index + 1 === len || (second = str.charCodeAt(index + 1)) < 0xdc00 || second > 0xdfff ? str[index] // single character : str.slice(index, index + 2); // two-index characters result.push(char); index += char.length; } return result; } /** * Fast version of ts function for when transition defaults are well known * @param {State} state * @param {string} input * @param {string} t * @param {string} defaultt * @param {[RegExp, State][]} jr * @returns {State} */ function fastts(state, input, t, defaultt, jr) { let next; const len = input.length; for (let i = 0; i < len - 1; i++) { const char = input[i]; if (state.j[char]) { next = state.j[char]; } else { next = new State(defaultt); next.jr = jr.slice(); state.j[char] = next; } state = next; } next = new State(t); next.jr = jr.slice(); state.j[input[len - 1]] = next; return next; } /** * Converts a string of Top-Level Domain names encoded in update-tlds.js back * into a list of strings. * @param {str} encoded encoded TLDs string * @returns {str[]} original TLDs list */ function decodeTlds(encoded) { const words = []; const stack = []; let i = 0; let digits = '0123456789'; while (i < encoded.length) { let popDigitCount = 0; while (digits.indexOf(encoded[i + popDigitCount]) >= 0) { popDigitCount++; // encountered some digits, have to pop to go one level up trie } if (popDigitCount > 0) { words.push(stack.join('')); // whatever preceded the pop digits must be a word for (let popCount = parseInt(encoded.substring(i, i + popDigitCount), 10); popCount > 0; popCount--) { stack.pop(); } i += popDigitCount; } else { stack.push(encoded[i]); // drop down a level into the trie i++; } } return words; } /** * An object where each key is a valid DOM Event Name such as `click` or `focus` * and each value is an event handler function. * * https://developer.mozilla.org/en-US/docs/Web/API/Element#events * @typedef {?{ [event: string]: Function }} EventListeners */ /** * All formatted properties required to render a link, including `tagName`, * `attributes`, `content` and `eventListeners`. * @typedef {{ tagName: any, attributes: {[attr: string]: any}, content: string, * eventListeners: EventListeners }} IntermediateRepresentation */ /** * Specify either an object described by the template type `O` or a function. * * The function takes a string value (usually the link's href attribute), the * link type (`'url'`, `'hashtag`', etc.) and an internal token representation * of the link. It should return an object of the template type `O` * @template O * @typedef {O | ((value: string, type: string, token: MultiToken) => O)} OptObj */ /** * Specify either a function described by template type `F` or an object. * * Each key in the object should be a link type (`'url'`, `'hashtag`', etc.). Each * value should be a function with template type `F` that is called when the * corresponding link type is encountered. * @template F * @typedef {F | { [type: string]: F}} OptFn */ /** * Specify either a value with template type `V`, a function that returns `V` or * an object where each value resolves to `V`. * * The function takes a string value (usually the link's href attribute), the * link type (`'url'`, `'hashtag`', etc.) and an internal token representation * of the link. It should return an object of the template type `V` * * For the object, each key should be a link type (`'url'`, `'hashtag`', etc.). * Each value should either have type `V` or a function that returns V. This * function similarly takes a string value and a token. * * Example valid types for `Opt`: * * ```js * 'hello' * (value, type, token) => 'world' * { url: 'hello', email: (value, token) => 'world'} * ``` * @template V * @typedef {V | ((value: string, type: string, token: MultiToken) => V) | { [type: string]: V | ((value: string, token: MultiToken) => V) }} Opt */ /** * See available options: https://linkify.js.org/docs/options.html * @typedef {{ * defaultProtocol?: string, * events?: OptObj, * format?: Opt, * formatHref?: Opt, * nl2br?: boolean, * tagName?: Opt, * target?: Opt, * rel?: Opt, * validate?: Opt, * truncate?: Opt, * className?: Opt, * attributes?: OptObj<({ [attr: string]: any })>, * ignoreTags?: string[], * render?: OptFn<((ir: IntermediateRepresentation) => any)> * }} Opts */ /** * @type Required */ const defaults = { defaultProtocol: 'http', events: null, format: noop, formatHref: noop, nl2br: false, tagName: 'a', target: null, rel: null, validate: true, truncate: Infinity, className: null, attributes: null, ignoreTags: [], render: null }; /** * Utility class for linkify interfaces to apply specified * {@link Opts formatting and rendering options}. * * @param {Opts | Options} [opts] Option value overrides. * @param {(ir: IntermediateRepresentation) => any} [defaultRender] (For * internal use) default render function that determines how to generate an * HTML element based on a link token's derived tagName, attributes and HTML. * Similar to render option */ function Options(opts, defaultRender) { if (defaultRender === void 0) { defaultRender = null; } let o = assign({}, defaults); if (opts) { o = assign(o, opts instanceof Options ? opts.o : opts); } // Ensure all ignored tags are uppercase const ignoredTags = o.ignoreTags; const uppercaseIgnoredTags = []; for (let i = 0; i < ignoredTags.length; i++) { uppercaseIgnoredTags.push(ignoredTags[i].toUpperCase()); } /** @protected */ this.o = o; if (defaultRender) { this.defaultRender = defaultRender; } this.ignoreTags = uppercaseIgnoredTags; } Options.prototype = { o: defaults, /** * @type string[] */ ignoreTags: [], /** * @param {IntermediateRepresentation} ir * @returns {any} */ defaultRender(ir) { return ir; }, /** * Returns true or false based on whether a token should be displayed as a * link based on the user options. * @param {MultiToken} token * @returns {boolean} */ check(token) { return this.get('validate', token.toString(), token); }, // Private methods /** * Resolve an option's value based on the value of the option and the given * params. If operator and token are specified and the target option is * callable, automatically calls the function with the given argument. * @template {keyof Opts} K * @param {K} key Name of option to use * @param {string} [operator] will be passed to the target option if it's a * function. If not specified, RAW function value gets returned * @param {MultiToken} [token] The token from linkify.tokenize * @returns {Opts[K] | any} */ get(key, operator, token) { const isCallable = operator != null; let option = this.o[key]; if (!option) { return option; } if (typeof option === 'object') { option = token.t in option ? option[token.t] : defaults[key]; if (typeof option === 'function' && isCallable) { option = option(operator, token); } } else if (typeof option === 'function' && isCallable) { option = option(operator, token.t, token); } return option; }, /** * @template {keyof Opts} L * @param {L} key Name of options object to use * @param {string} [operator] * @param {MultiToken} [token] * @returns {Opts[L] | any} */ getObj(key, operator, token) { let obj = this.o[key]; if (typeof obj === 'function' && operator != null) { obj = obj(operator, token.t, token); } return obj; }, /** * Convert the given token to a rendered element that may be added to the * calling-interface's DOM * @param {MultiToken} token Token to render to an HTML element * @returns {any} Render result; e.g., HTML string, DOM element, React * Component, etc. */ render(token) { const ir = token.render(this); // intermediate representation const renderFn = this.get('render', null, token) || this.defaultRender; return renderFn(ir, token.t, token); } }; function noop(val) { return val; } var options = /*#__PURE__*/Object.freeze({ __proto__: null, defaults: defaults, Options: Options, assign: assign }); /****************************************************************************** Multi-Tokens Tokens composed of arrays of TextTokens ******************************************************************************/ /** * @param {string} value * @param {Token[]} tokens */ function MultiToken(value, tokens) { this.t = 'token'; this.v = value; this.tk = tokens; } /** * Abstract class used for manufacturing tokens of text tokens. That is rather * than the value for a token being a small string of text, it's value an array * of text tokens. * * Used for grouping together URLs, emails, hashtags, and other potential * creations. * @class MultiToken * @property {string} t * @property {string} v * @property {Token[]} tk * @abstract */ MultiToken.prototype = { isLink: false, /** * Return the string this token represents. * @return {string} */ toString() { return this.v; }, /** * What should the value for this token be in the `href` HTML attribute? * Returns the `.toString` value by default. * @param {string} [scheme] * @return {string} */ toHref(scheme) { return this.toString(); }, /** * @param {Options} options Formatting options * @returns {string} */ toFormattedString(options) { const val = this.toString(); const truncate = options.get('truncate', val, this); const formatted = options.get('format', val, this); return truncate && formatted.length > truncate ? formatted.substring(0, truncate) + '…' : formatted; }, /** * * @param {Options} options * @returns {string} */ toFormattedHref(options) { return options.get('formatHref', this.toHref(options.get('defaultProtocol')), this); }, /** * The start index of this token in the original input string * @returns {number} */ startIndex() { return this.tk[0].s; }, /** * The end index of this token in the original input string (up to this * index but not including it) * @returns {number} */ endIndex() { return this.tk[this.tk.length - 1].e; }, /** Returns an object of relevant values for this token, which includes keys * type - Kind of token ('url', 'email', etc.) * value - Original text * href - The value that should be added to the anchor tag's href attribute @method toObject @param {string} [protocol] `'http'` by default */ toObject(protocol) { if (protocol === void 0) { protocol = defaults.defaultProtocol; } return { type: this.t, value: this.toString(), isLink: this.isLink, href: this.toHref(protocol), start: this.startIndex(), end: this.endIndex() }; }, /** * * @param {Options} options Formatting option */ toFormattedObject(options) { return { type: this.t, value: this.toFormattedString(options), isLink: this.isLink, href: this.toFormattedHref(options), start: this.startIndex(), end: this.endIndex() }; }, /** * Whether this token should be rendered as a link according to the given options * @param {Options} options * @returns {boolean} */ validate(options) { return options.get('validate', this.toString(), this); }, /** * Return an object that represents how this link should be rendered. * @param {Options} options Formattinng options */ render(options) { const token = this; const href = this.toHref(options.get('defaultProtocol')); const formattedHref = options.get('formatHref', href, this); const tagName = options.get('tagName', href, token); const content = this.toFormattedString(options); const attributes = {}; const className = options.get('className', href, token); const target = options.get('target', href, token); const rel = options.get('rel', href, token); const attrs = options.getObj('attributes', href, token); const eventListeners = options.getObj('events', href, token); attributes.href = formattedHref; if (className) { attributes.class = className; } if (target) { attributes.target = target; } if (rel) { attributes.rel = rel; } if (attrs) { assign(attributes, attrs); } return { tagName, attributes, content, eventListeners }; } }; /** * Create a new token that can be emitted by the parser state machine * @param {string} type readable type of the token * @param {object} props properties to assign or override, including isLink = true or false * @returns {new (value: string, tokens: Token[]) => MultiToken} new token class */ function createTokenClass(type, props) { class Token extends MultiToken { constructor(value, tokens) { super(value, tokens); this.t = type; } } for (const p in props) { Token.prototype[p] = props[p]; } Token.t = type; return Token; } /** Represents a list of tokens making up a valid email address */ const Email = createTokenClass('email', { isLink: true, toHref() { return 'mailto:' + this.toString(); } }); /** Represents some plain text */ const Text = createTokenClass('text'); /** Multi-linebreak token - represents a line break @class Nl */ const Nl = createTokenClass('nl'); /** Represents a list of text tokens making up a valid URL @class Url */ const Url = createTokenClass('url', { isLink: true, /** Lowercases relevant parts of the domain and adds the protocol if required. Note that this will not escape unsafe HTML characters in the URL. @param {string} [scheme] default scheme (e.g., 'https') @return {string} the full href */ toHref(scheme) { if (scheme === void 0) { scheme = defaults.defaultProtocol; } // Check if already has a prefix scheme return this.hasProtocol() ? this.v : `${scheme}://${this.v}`; }, /** * Check whether this URL token has a protocol * @return {boolean} */ hasProtocol() { const tokens = this.tk; return tokens.length >= 2 && tokens[0].t !== LOCALHOST && tokens[1].t === COLON; } }); var multi = /*#__PURE__*/Object.freeze({ __proto__: null, MultiToken: MultiToken, Base: MultiToken, createTokenClass: createTokenClass, Email: Email, Text: Text, Nl: Nl, Url: Url }); /** Not exactly parser, more like the second-stage scanner (although we can theoretically hotswap the code here with a real parser in the future... but for a little URL-finding utility abstract syntax trees may be a little overkill). URL format: http://en.wikipedia.org/wiki/URI_scheme Email format: http://en.wikipedia.org/wiki/EmailAddress (links to RFC in reference) @module linkify @submodule parser @main run */ const makeState = arg => new State(arg); /** * Generate the parser multi token-based state machine * @param {{ groups: Collections }} tokens */ function init$1(_ref) { let { groups } = _ref; // Types of characters the URL can definitely end in const qsAccepting = groups.domain.concat([AMPERSAND, ASTERISK, AT, BACKSLASH, BACKTICK, CARET, DOLLAR, EQUALS, HYPHEN, NUM, PERCENT, PIPE, PLUS, POUND, SLASH, SYM, TILDE, UNDERSCORE]); // Types of tokens that can follow a URL and be part of the query string // but cannot be the very last characters // Characters that cannot appear in the URL at all should be excluded const qsNonAccepting = [APOSTROPHE, COLON, COMMA, DOT, EXCLAMATION, QUERY, QUOTE, SEMI, OPENANGLEBRACKET, CLOSEANGLEBRACKET, OPENBRACE, CLOSEBRACE, CLOSEBRACKET, OPENBRACKET, OPENPAREN, CLOSEPAREN, FULLWIDTHLEFTPAREN, FULLWIDTHRIGHTPAREN, LEFTCORNERBRACKET, RIGHTCORNERBRACKET, LEFTWHITECORNERBRACKET, RIGHTWHITECORNERBRACKET, FULLWIDTHLESSTHAN, FULLWIDTHGREATERTHAN]; // For addresses without the mailto prefix // Tokens allowed in the localpart of the email const localpartAccepting = [AMPERSAND, APOSTROPHE, ASTERISK, BACKSLASH, BACKTICK, CARET, DOLLAR, EQUALS, HYPHEN, OPENBRACE, CLOSEBRACE, PERCENT, PIPE, PLUS, POUND, QUERY, SLASH, SYM, TILDE, UNDERSCORE]; // The universal starting state. /** * @type State */ const Start = makeState(); const Localpart = tt(Start, TILDE); // Local part of the email address ta(Localpart, localpartAccepting, Localpart); ta(Localpart, groups.domain, Localpart); const Domain = makeState(), Scheme = makeState(), SlashScheme = makeState(); ta(Start, groups.domain, Domain); // parsed string ends with a potential domain name (A) ta(Start, groups.scheme, Scheme); // e.g., 'mailto' ta(Start, groups.slashscheme, SlashScheme); // e.g., 'http' ta(Domain, localpartAccepting, Localpart); ta(Domain, groups.domain, Domain); const LocalpartAt = tt(Domain, AT); // Local part of the email address plus @ tt(Localpart, AT, LocalpartAt); // close to an email address now // Local part of an email address can be e.g. 'http' or 'mailto' tt(Scheme, AT, LocalpartAt); tt(SlashScheme, AT, LocalpartAt); const LocalpartDot = tt(Localpart, DOT); // Local part of the email address plus '.' (localpart cannot end in .) ta(LocalpartDot, localpartAccepting, Localpart); ta(LocalpartDot, groups.domain, Localpart); const EmailDomain = makeState(); ta(LocalpartAt, groups.domain, EmailDomain); // parsed string starts with local email info + @ with a potential domain name ta(EmailDomain, groups.domain, EmailDomain); const EmailDomainDot = tt(EmailDomain, DOT); // domain followed by DOT ta(EmailDomainDot, groups.domain, EmailDomain); const Email$1 = makeState(Email); // Possible email address (could have more tlds) ta(EmailDomainDot, groups.tld, Email$1); ta(EmailDomainDot, groups.utld, Email$1); tt(LocalpartAt, LOCALHOST, Email$1); // Hyphen can jump back to a domain name const EmailDomainHyphen = tt(EmailDomain, HYPHEN); // parsed string starts with local email info + @ with a potential domain name ta(EmailDomainHyphen, groups.domain, EmailDomain); ta(Email$1, groups.domain, EmailDomain); tt(Email$1, DOT, EmailDomainDot); tt(Email$1, HYPHEN, EmailDomainHyphen); // Final possible email states const EmailColon = tt(Email$1, COLON); // URL followed by colon (potential port number here) /*const EmailColonPort = */ ta(EmailColon, groups.numeric, Email); // URL followed by colon and port number // Account for dots and hyphens. Hyphens are usually parts of domain names // (but not TLDs) const DomainHyphen = tt(Domain, HYPHEN); // domain followed by hyphen const DomainDot = tt(Domain, DOT); // domain followed by DOT ta(DomainHyphen, groups.domain, Domain); ta(DomainDot, localpartAccepting, Localpart); ta(DomainDot, groups.domain, Domain); const DomainDotTld = makeState(Url); // Simplest possible URL with no query string ta(DomainDot, groups.tld, DomainDotTld); ta(DomainDot, groups.utld, DomainDotTld); ta(DomainDotTld, groups.domain, Domain); ta(DomainDotTld, localpartAccepting, Localpart); tt(DomainDotTld, DOT, DomainDot); tt(DomainDotTld, HYPHEN, DomainHyphen); tt(DomainDotTld, AT, LocalpartAt); const DomainDotTldColon = tt(DomainDotTld, COLON); // URL followed by colon (potential port number here) const DomainDotTldColonPort = makeState(Url); // TLD followed by a port number ta(DomainDotTldColon, groups.numeric, DomainDotTldColonPort); // Long URL with optional port and maybe query string const Url$1 = makeState(Url); // URL with extra symbols at the end, followed by an opening bracket const UrlNonaccept = makeState(); // URL followed by some symbols (will not be part of the final URL) // Query strings ta(Url$1, qsAccepting, Url$1); ta(Url$1, qsNonAccepting, UrlNonaccept); ta(UrlNonaccept, qsAccepting, Url$1); ta(UrlNonaccept, qsNonAccepting, UrlNonaccept); // Become real URLs after `SLASH` or `COLON NUM SLASH` // Here works with or without scheme:// prefix tt(DomainDotTld, SLASH, Url$1); tt(DomainDotTldColonPort, SLASH, Url$1); // Note that domains that begin with schemes are treated slighly differently const SchemeColon = tt(Scheme, COLON); // e.g., 'mailto:' const SlashSchemeColon = tt(SlashScheme, COLON); // e.g., 'http:' const SlashSchemeColonSlash = tt(SlashSchemeColon, SLASH); // e.g., 'http:/' const UriPrefix = tt(SlashSchemeColonSlash, SLASH); // e.g., 'http://' // Scheme states can transition to domain states ta(Scheme, groups.domain, Domain); tt(Scheme, DOT, DomainDot); tt(Scheme, HYPHEN, DomainHyphen); ta(SlashScheme, groups.domain, Domain); tt(SlashScheme, DOT, DomainDot); tt(SlashScheme, HYPHEN, DomainHyphen); // Force URL with scheme prefix followed by anything sane ta(SchemeColon, groups.domain, Url$1); tt(SchemeColon, SLASH, Url$1); ta(UriPrefix, groups.domain, Url$1); ta(UriPrefix, qsAccepting, Url$1); tt(UriPrefix, SLASH, Url$1); const bracketPairs = [[OPENBRACE, CLOSEBRACE], // {} [OPENBRACKET, CLOSEBRACKET], // [] [OPENPAREN, CLOSEPAREN], // () [OPENANGLEBRACKET, CLOSEANGLEBRACKET], // <> [FULLWIDTHLEFTPAREN, FULLWIDTHRIGHTPAREN], // () [LEFTCORNERBRACKET, RIGHTCORNERBRACKET], // 「」 [LEFTWHITECORNERBRACKET, RIGHTWHITECORNERBRACKET], // 『』 [FULLWIDTHLESSTHAN, FULLWIDTHGREATERTHAN] // <> ]; for (let i = 0; i < bracketPairs.length; i++) { const [OPEN, CLOSE] = bracketPairs[i]; const UrlOpen = tt(Url$1, OPEN); // URL followed by open bracket // Continue not accepting for open brackets tt(UrlNonaccept, OPEN, UrlOpen); // Closing bracket component. This character WILL be included in the URL tt(UrlOpen, CLOSE, Url$1); // URL that beings with an opening bracket, followed by a symbols. // Note that the final state can still be `UrlOpen` (if the URL has a // single opening bracket for some reason). const UrlOpenQ = makeState(Url); ta(UrlOpen, qsAccepting, UrlOpenQ); const UrlOpenSyms = makeState(); // UrlOpen followed by some symbols it cannot end it ta(UrlOpen, qsNonAccepting); // URL that begins with an opening bracket, followed by some symbols ta(UrlOpenQ, qsAccepting, UrlOpenQ); ta(UrlOpenQ, qsNonAccepting, UrlOpenSyms); ta(UrlOpenSyms, qsAccepting, UrlOpenQ); ta(UrlOpenSyms, qsNonAccepting, UrlOpenSyms); // Close brace/bracket to become regular URL tt(UrlOpenQ, CLOSE, Url$1); tt(UrlOpenSyms, CLOSE, Url$1); } tt(Start, LOCALHOST, DomainDotTld); // localhost is a valid URL state tt(Start, NL$1, Nl); // single new line return { start: Start, tokens: tk }; } /** * Run the parser state machine on a list of scanned string-based tokens to * create a list of multi tokens, each of which represents a URL, email address, * plain text, etc. * * @param {State} start parser start state * @param {string} input the original input used to generate the given tokens * @param {Token[]} tokens list of scanned tokens * @returns {MultiToken[]} */ function run(start, input, tokens) { let len = tokens.length; let cursor = 0; let multis = []; let textTokens = []; while (cursor < len) { let state = start; let secondState = null; let nextState = null; let multiLength = 0; let latestAccepting = null; let sinceAccepts = -1; while (cursor < len && !(secondState = state.go(tokens[cursor].t))) { // Starting tokens with nowhere to jump to. // Consider these to be just plain text textTokens.push(tokens[cursor++]); } while (cursor < len && (nextState = secondState || state.go(tokens[cursor].t))) { // Get the next state secondState = null; state = nextState; // Keep track of the latest accepting state if (state.accepts()) { sinceAccepts = 0; latestAccepting = state; } else if (sinceAccepts >= 0) { sinceAccepts++; } cursor++; multiLength++; } if (sinceAccepts < 0) { // No accepting state was found, part of a regular text token add // the first text token to the text tokens array and try again from // the next cursor -= multiLength; if (cursor < len) { textTokens.push(tokens[cursor]); cursor++; } } else { // Accepting state! // First close off the textTokens (if available) if (textTokens.length > 0) { multis.push(initMultiToken(Text, input, textTokens)); textTokens = []; } // Roll back to the latest accepting state cursor -= sinceAccepts; multiLength -= sinceAccepts; // Create a new multitoken const Multi = latestAccepting.t; const subtokens = tokens.slice(cursor - multiLength, cursor); multis.push(initMultiToken(Multi, input, subtokens)); } } // Finally close off the textTokens (if available) if (textTokens.length > 0) { multis.push(initMultiToken(Text, input, textTokens)); } return multis; } /** * Utility function for instantiating a new multitoken with all the relevant * fields during parsing. * @param {new (value: string, tokens: Token[]) => MultiToken} Multi class to instantiate * @param {string} input original input string * @param {Token[]} tokens consecutive tokens scanned from input string * @returns {MultiToken} */ function initMultiToken(Multi, input, tokens) { const startIdx = tokens[0].s; const endIdx = tokens[tokens.length - 1].e; const value = input.slice(startIdx, endIdx); return new Multi(value, tokens); } const warn = typeof console !== 'undefined' && console && console.warn || (() => {}); const warnAdvice = 'until manual call of linkify.init(). Register all schemes and plugins before invoking linkify the first time.'; // Side-effect initialization state const INIT = { scanner: null, parser: null, tokenQueue: [], pluginQueue: [], customSchemes: [], initialized: false }; /** * @typedef {{ * start: State, * tokens: { groups: Collections } & typeof tk * }} ScannerInit */ /** * @typedef {{ * start: State, * tokens: typeof multi * }} ParserInit */ /** * @typedef {(arg: { scanner: ScannerInit }) => void} TokenPlugin */ /** * @typedef {(arg: { scanner: ScannerInit, parser: ParserInit }) => void} Plugin */ /** * De-register all plugins and reset the internal state-machine. Used for * testing; not required in practice. * @private */ function reset() { State.groups = {}; INIT.scanner = null; INIT.parser = null; INIT.tokenQueue = []; INIT.pluginQueue = []; INIT.customSchemes = []; INIT.initialized = false; } /** * Register a token plugin to allow the scanner to recognize additional token * types before the parser state machine is constructed from the results. * @param {string} name of plugin to register * @param {TokenPlugin} plugin function that accepts the scanner state machine * and available scanner tokens and collections and extends the state machine to * recognize additional tokens or groups. */ function registerTokenPlugin(name, plugin) { if (typeof plugin !== 'function') { throw new Error(`linkifyjs: Invalid token plugin ${plugin} (expects function)`); } for (let i = 0; i < INIT.tokenQueue.length; i++) { if (name === INIT.tokenQueue[i][0]) { warn(`linkifyjs: token plugin "${name}" already registered - will be overwritten`); INIT.tokenQueue[i] = [name, plugin]; return; } } INIT.tokenQueue.push([name, plugin]); if (INIT.initialized) { warn(`linkifyjs: already initialized - will not register token plugin "${name}" ${warnAdvice}`); } } /** * Register a linkify plugin * @param {string} name of plugin to register * @param {Plugin} plugin function that accepts the parser state machine and * extends the parser to recognize additional link types */ function registerPlugin(name, plugin) { if (typeof plugin !== 'function') { throw new Error(`linkifyjs: Invalid plugin ${plugin} (expects function)`); } for (let i = 0; i < INIT.pluginQueue.length; i++) { if (name === INIT.pluginQueue[i][0]) { warn(`linkifyjs: plugin "${name}" already registered - will be overwritten`); INIT.pluginQueue[i] = [name, plugin]; return; } } INIT.pluginQueue.push([name, plugin]); if (INIT.initialized) { warn(`linkifyjs: already initialized - will not register plugin "${name}" ${warnAdvice}`); } } /** * Detect URLs with the following additional protocol. Anything with format * "protocol://..." will be considered a link. If `optionalSlashSlash` is set to * `true`, anything with format "protocol:..." will be considered a link. * @param {string} protocol * @param {boolean} [optionalSlashSlash] */ function registerCustomProtocol(scheme, optionalSlashSlash) { if (optionalSlashSlash === void 0) { optionalSlashSlash = false; } if (INIT.initialized) { warn(`linkifyjs: already initialized - will not register custom scheme "${scheme}" ${warnAdvice}`); } if (!/^[0-9a-z]+(-[0-9a-z]+)*$/.test(scheme)) { throw new Error(`linkifyjs: incorrect scheme format. 1. Must only contain digits, lowercase ASCII letters or "-" 2. Cannot start or end with "-" 3. "-" cannot repeat`); } INIT.customSchemes.push([scheme, optionalSlashSlash]); } /** * Initialize the linkify state machine. Called automatically the first time * linkify is called on a string, but may be called manually as well. */ function init() { // Initialize scanner state machine and plugins INIT.scanner = init$2(INIT.customSchemes); for (let i = 0; i < INIT.tokenQueue.length; i++) { INIT.tokenQueue[i][1]({ scanner: INIT.scanner }); } // Initialize parser state machine and plugins INIT.parser = init$1(INIT.scanner.tokens); for (let i = 0; i < INIT.pluginQueue.length; i++) { INIT.pluginQueue[i][1]({ scanner: INIT.scanner, parser: INIT.parser }); } INIT.initialized = true; } /** * Parse a string into tokens that represent linkable and non-linkable sub-components * @param {string} str * @return {MultiToken[]} tokens */ function tokenize(str) { if (!INIT.initialized) { init(); } return run(INIT.parser.start, str, run$1(INIT.scanner.start, str)); } /** * Find a list of linkable items in the given string. * @param {string} str string to find links in * @param {string | Opts} [type] either formatting options or specific type of * links to find, e.g., 'url' or 'email' * @param {Opts} [opts] formatting options for final output. Cannot be specified * if opts already provided in `type` argument */ function find(str, type, opts) { if (type === void 0) { type = null; } if (opts === void 0) { opts = null; } if (type && typeof type === 'object') { if (opts) { throw Error(`linkifyjs: Invalid link type ${type}; must be a string`); } opts = type; type = null; } const options = new Options(opts); const tokens = tokenize(str); const filtered = []; for (let i = 0; i < tokens.length; i++) { const token = tokens[i]; if (token.isLink && (!type || token.t === type) && options.check(token)) { filtered.push(token.toFormattedObject(options)); } } return filtered; } /** * Is the given string valid linkable text of some sort. Note that this does not * trim the text for you. * * Optionally pass in a second `type` param, which is the type of link to test * for. * * For example, * * linkify.test(str, 'email'); * * Returns `true` if str is a valid email. * @param {string} str string to test for links * @param {string} [type] optional specific link type to look for * @returns boolean true/false */ function test(str, type) { if (type === void 0) { type = null; } const tokens = tokenize(str); return tokens.length === 1 && tokens[0].isLink && (!type || tokens[0].t === type); } exports.MultiToken = MultiToken; exports.Options = Options; exports.State = State; exports.createTokenClass = createTokenClass; exports.find = find; exports.init = init; exports.multi = multi; exports.options = options; exports.regexp = regexp; exports.registerCustomProtocol = registerCustomProtocol; exports.registerPlugin = registerPlugin; exports.registerTokenPlugin = registerTokenPlugin; exports.reset = reset; exports.stringToArray = stringToArray; exports.test = test; exports.tokenize = tokenize;