import { wrapText, parseTokens } from "./textWrapping"; import type { FontString } from "./types"; describe("Test wrapText", () => { // font is irrelevant as jsdom does not support FontFace API // `measureText` width is mocked to return `text.length` by `jest-canvas-mock` // https://github.com/hustcc/jest-canvas-mock/blob/master/src/classes/TextMetrics.js const font = "10px Cascadia, Segoe UI Emoji" as FontString; it("should wrap the text correctly when word length is exactly equal to max width", () => { const text = "Hello Excalidraw"; // Length of "Excalidraw" is 100 and exacty equal to max width const res = wrapText(text, font, 100); expect(res).toEqual(`Hello\nExcalidraw`); }); it("should return the text as is if max width is invalid", () => { const text = "Hello Excalidraw"; expect(wrapText(text, font, NaN)).toEqual(text); expect(wrapText(text, font, -1)).toEqual(text); expect(wrapText(text, font, Infinity)).toEqual(text); }); it("should show the text correctly when max width reached", () => { const text = "HelloπŸ˜€"; const maxWidth = 10; const res = wrapText(text, font, maxWidth); expect(res).toBe("H\ne\nl\nl\no\nπŸ˜€"); }); it("should not wrap number when wrapping line", () => { const text = "don't wrap this number 99,100.99"; const maxWidth = 300; const res = wrapText(text, font, maxWidth); expect(res).toBe("don't wrap this number\n99,100.99"); }); it("should trim all trailing whitespaces", () => { const text = "Hello "; const maxWidth = 50; const res = wrapText(text, font, maxWidth); expect(res).toBe("Hello"); }); it("should trim all but one trailing whitespaces", () => { const text = "Hello "; const maxWidth = 60; const res = wrapText(text, font, maxWidth); expect(res).toBe("Hello "); }); it("should keep preceding whitespaces and trim all trailing whitespaces", () => { const text = " Hello World"; const maxWidth = 90; const res = wrapText(text, font, maxWidth); expect(res).toBe(" Hello\nWorld"); }); it("should keep some preceding whitespaces, trim trailing whitespaces, but kep those that fit in the trailing line", () => { const text = " Hello World "; const maxWidth = 90; const res = wrapText(text, font, maxWidth); expect(res).toBe(" Hello\nWorld "); }); it("should trim keep those whitespace that fit in the trailing line", () => { const text = "Hello Wo rl d "; const maxWidth = 100; const res = wrapText(text, font, maxWidth); expect(res).toBe("Hello Wo\nrl d "); }); it("should support multiple (multi-codepoint) emojis", () => { const text = "πŸ˜€πŸ—ΊπŸ”₯πŸ‘©πŸ½β€πŸ¦°πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦πŸ‡¨πŸ‡Ώ"; const maxWidth = 1; const res = wrapText(text, font, maxWidth); expect(res).toBe("πŸ˜€\nπŸ—Ί\nπŸ”₯\nπŸ‘©πŸ½β€πŸ¦°\nπŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦\nπŸ‡¨πŸ‡Ώ"); }); it("should wrap the text correctly when text contains hyphen", () => { let text = "Wikipedia is hosted by Wikimedia- Foundation, a non-profit organization that also hosts a range-of other projects"; const res = wrapText(text, font, 110); expect(res).toBe( `Wikipedia\nis hosted\nby\nWikimedia-\nFoundation,\na non-\nprofit\norganizatio\nn that also\nhosts a\nrange-of\nother\nprojects`, ); text = "Hello thereusing-now"; expect(wrapText(text, font, 100)).toEqual("Hello\nthereusing\n-now"); }); it("should support wrapping nested lists", () => { const text = `\tA) one tab\t\t- two tabs - 8 spaces`; const maxWidth = 100; const res = wrapText(text, font, maxWidth); expect(res).toBe(`\tA) one\ntab\t\t- two\ntabs\n- 8 spaces`); const maxWidth2 = 50; const res2 = wrapText(text, font, maxWidth2); expect(res2).toBe(`\tA)\none\ntab\n- two\ntabs\n- 8\nspace\ns`); }); describe("When text is CJK", () => { it("should break each CJK character when width is very small", () => { // "μ•ˆλ…•ν•˜μ„Έμš”" (Hangul) + "γ“γ‚“γ«γ‘γ―δΈ–η•Œ" (Hiragana, Kanji) + "コンニチハ" (Katakana) + "δ½ ε₯½" (Han) = "Hello Hello World Hello Hi" const text = "μ•ˆλ…•ν•˜μ„Έμš”γ“γ‚“γ«γ‘γ―δΈ–η•Œο½ΊοΎοΎ†οΎγƒδ½ ε₯½"; const maxWidth = 10; const res = wrapText(text, font, maxWidth); expect(res).toBe( "μ•ˆ\nλ…•\nν•˜\nμ„Έ\nμš”\nこ\nγ‚“\nに\nけ\nは\nδΈ–\nη•Œ\nο½Ί\nン\nοΎ†\nチ\nハ\nδ½ \nε₯½", ); }); it("should break CJK text into longer segments when width is larger", () => { // "μ•ˆλ…•ν•˜μ„Έμš”" (Hangul) + "γ“γ‚“γ«γ‘γ―δΈ–η•Œ" (Hiragana, Kanji) + "コンニチハ" (Katakana) + "δ½ ε₯½" (Han) = "Hello Hello World Hello Hi" const text = "μ•ˆλ…•ν•˜μ„Έμš”γ“γ‚“γ«γ‘γ―δΈ–η•Œο½ΊοΎοΎ†οΎγƒδ½ ε₯½"; const maxWidth = 30; const res = wrapText(text, font, maxWidth); // measureText is mocked, so it's not precisely what would happen in prod expect(res).toBe("μ•ˆλ…•ν•˜\nμ„Έμš”γ“\nんにけ\nγ―δΈ–η•Œ\nコンニ\nチハ你\nε₯½"); }); it("should handle a combination of CJK, latin, emojis and whitespaces", () => { const text = `a醫 醫 bb δ½ ε₯½ world-i-πŸ˜€πŸ—ΊπŸ”₯`; const maxWidth = 150; const res = wrapText(text, font, maxWidth); expect(res).toBe(`a醫 醫 bb δ½ \nε₯½ world-i-πŸ˜€πŸ—Ί\nπŸ”₯`); const maxWidth2 = 50; const res2 = wrapText(text, font, maxWidth2); expect(res2).toBe(`a醫 醫\nbb δ½ \nε₯½\nworld\n-i-πŸ˜€\nπŸ—ΊπŸ”₯`); const maxWidth3 = 30; const res3 = wrapText(text, font, maxWidth3); expect(res3).toBe(`a醫\n醫\nbb\nδ½ ε₯½\nwor\nld-\ni-\nπŸ˜€\nπŸ—Ί\nπŸ”₯`); }); it("should break before and after a regular CJK character", () => { const text = "HelloたWorld"; const maxWidth1 = 50; const res1 = wrapText(text, font, maxWidth1); expect(res1).toBe("Hello\nた\nWorld"); const maxWidth2 = 60; const res2 = wrapText(text, font, maxWidth2); expect(res2).toBe("Helloた\nWorld"); }); it("should break before and after certain CJK symbols", () => { const text = "γ“γ‚“γ«γ‘γ―γ€ƒδΈ–η•Œ"; const maxWidth1 = 50; const res1 = wrapText(text, font, maxWidth1); expect(res1).toBe("こんにけは\nγ€ƒδΈ–η•Œ"); const maxWidth2 = 60; const res2 = wrapText(text, font, maxWidth2); expect(res2).toBe("こんにけは〃\nδΈ–η•Œ"); }); it("should break after, not before for certain CJK pairs", () => { const text = "Hello γŸγ€‚"; const maxWidth = 70; const res = wrapText(text, font, maxWidth); expect(res).toBe("Hello\nγŸγ€‚"); }); it("should break before, not after for certain CJK pairs", () => { const text = "Helloγ€ŒγŸWorld」"; const maxWidth = 60; const res = wrapText(text, font, maxWidth); expect(res).toBe("Hello\nγ€ŒγŸ\nWorld」"); }); it("should break after, not before for certain CJK character pairs", () => { const text = "γ€ŒHelloγŸγ€World"; const maxWidth = 70; const res = wrapText(text, font, maxWidth); expect(res).toBe("γ€ŒHello\nγŸγ€World"); }); it("should break Chinese sentences", () => { const text = `δΈ­ε›½δ½ ε₯½οΌθΏ™ζ˜―δΈ€δΈͺ桋试。 ζˆ‘δ»¬ζ₯ηœ‹ηœ‹οΌšδΊΊζ°‘εΈΒ₯1234γ€ŒεΎˆθ΄΅γ€ οΌˆζ‹¬ε·οΌ‰γ€ι€—ε·οΌŒε₯号。空格 ζ’θ‘Œγ€€ε…¨θ§’η¬¦ε·β€¦β€”`; const maxWidth1 = 80; const res1 = wrapText(text, font, maxWidth1); expect(res1).toBe(`δΈ­ε›½δ½ ε₯½οΌθΏ™ζ˜―δΈ€\nδΈͺ桋试。 ζˆ‘δ»¬ζ₯ηœ‹ηœ‹οΌšδΊΊζ°‘\n币Β₯1234γ€ŒεΎˆ\n贡」 οΌˆζ‹¬ε·οΌ‰γ€ι€—ε·οΌŒ\nε₯号。空格 捒葌\n全角符号…—`); const maxWidth2 = 50; const res2 = wrapText(text, font, maxWidth2); expect(res2).toBe(`δΈ­ε›½δ½ ε₯½οΌ\nθΏ™ζ˜―δΈ€δΈͺζ΅‹\n试。 ζˆ‘δ»¬ζ₯ηœ‹\nηœ‹οΌšδΊΊζ°‘εΈ\nΒ₯1234\nγ€ŒεΎˆθ΄΅γ€ οΌˆζ‹¬ε·οΌ‰γ€\nι€—ε·οΌŒε₯\n号。空格\nζ’θ‘Œγ€€ε…¨θ§’\n符号…—`); }); it("should break Japanese sentences", () => { const text = `ζ—₯ζœ¬γ“γ‚“γ«γ‘γ―οΌγ“γ‚Œγ―γƒ†γ‚Ήγƒˆγ§γ™γ€‚ θ¦‹γ¦γΏγΎγ—γ‚‡γ†οΌšε††οΏ₯1234γ€Œι«˜γ„γ€ οΌˆζ‹¬εΌ§οΌ‰γ€θͺ­η‚Ήγ€ε₯点。 η©Ίη™½ ζ”Ήθ‘Œγ€€ε…¨θ§’θ¨˜ε·β€¦γƒΌ`; const maxWidth1 = 80; const res1 = wrapText(text, font, maxWidth1); expect(res1).toBe(`ζ—₯ζœ¬γ“γ‚“γ«γ‘γ―οΌ\nγ“γ‚Œγ―γƒ†γ‚Ήγƒˆγ§\nす。 見てみましょ\nγ†οΌšε††οΏ₯1234\nγ€Œι«˜γ„γ€ οΌˆζ‹¬εΌ§οΌ‰γ€θͺ­\n点、ε₯点。 η©Ίη™½ ζ”Ήθ‘Œ\nε…¨θ§’θ¨˜ε·β€¦γƒΌ`); const maxWidth2 = 50; const res2 = wrapText(text, font, maxWidth2); expect(res2).toBe(`ζ—₯ζœ¬γ“γ‚“γ«\nγ‘γ―οΌγ“γ‚Œ\nγ―γƒ†γ‚Ήγƒˆγ§\nす。 見てみ\nγΎγ—γ‚‡γ†οΌš\n円\nοΏ₯1234\nγ€Œι«˜γ„γ€ οΌˆζ‹¬\n弧)、θͺ­\n点、ε₯点。 η©Ίη™½\nζ”Ήθ‘Œγ€€ε…¨θ§’\nθ¨˜ε·β€¦γƒΌ`); }); it("should break Korean sentences", () => { const text = `ν•œκ΅­ μ•ˆλ…•ν•˜μ„Έμš”! 이것은 ν…ŒμŠ€νŠΈμž…λ‹ˆλ‹€. 우리 보자: 원화₩1234γ€ŒλΉ„μ‹Έλ‹€γ€ (κ΄„ν˜Έ), μ‰Όν‘œ, λ§ˆμΉ¨ν‘œ. 곡백 μ€„λ°”κΏˆγ€€μ „κ°κΈ°ν˜Έβ€¦β€”`; const maxWidth1 = 80; const res1 = wrapText(text, font, maxWidth1); expect(res1).toBe(`ν•œκ΅­ μ•ˆλ…•ν•˜μ„Έ\nμš”! 이것은 ν…Œ\nμŠ€νŠΈμž…λ‹ˆλ‹€. 우리 보자: 원\nν™”β‚©1234γ€ŒλΉ„\n싸닀」 (κ΄„ν˜Έ), μ‰Ό\nν‘œ, λ§ˆμΉ¨ν‘œ. 곡백 μ€„λ°”κΏˆγ€€μ „\nκ°κΈ°ν˜Έβ€¦β€”`); const maxWidth2 = 60; const res2 = wrapText(text, font, maxWidth2); expect(res2).toBe(`ν•œκ΅­ μ•ˆλ…•ν•˜\nμ„Έμš”! 이것\n은 ν…ŒμŠ€νŠΈμž…\nλ‹ˆλ‹€. 우리 보자:\n원화\nβ‚©1234\nγ€ŒλΉ„μ‹Έλ‹€γ€ (κ΄„ν˜Έ),\nμ‰Όν‘œ, 마침\nν‘œ. 곡백 μ€„λ°”κΏˆ\nμ „κ°κΈ°ν˜Έβ€¦β€”`); }); }); describe("When text contains leading whitespaces", () => { const text = " \t Hello world"; it("should preserve leading whitespaces", () => { const maxWidth = 120; const res = wrapText(text, font, maxWidth); expect(res).toBe(" \t Hello\nworld"); }); it("should break and collapse leading whitespaces when line breaks", () => { const maxWidth = 60; const res = wrapText(text, font, maxWidth); expect(res).toBe("\nHello\nworld"); }); it("should break and collapse leading whitespaces whe words break", () => { const maxWidth = 30; const res = wrapText(text, font, maxWidth); expect(res).toBe("\nHel\nlo\nwor\nld"); }); }); describe("When text contains trailing whitespaces", () => { it("shouldn't add new lines for trailing spaces", () => { const text = "Hello whats up "; const maxWidth = 190; const res = wrapText(text, font, maxWidth); expect(res).toBe(text); }); it("should ignore trailing whitespaces when line breaks", () => { const text = "Hippopotomonstrosesquippedaliophobia ??????"; const maxWidth = 400; const res = wrapText(text, font, maxWidth); expect(res).toBe("Hippopotomonstrosesquippedaliophobia\n??????"); }); it("should not ignore trailing whitespaces when word breaks", () => { const text = "Hippopotomonstrosesquippedaliophobia ??????"; const maxWidth = 300; const res = wrapText(text, font, maxWidth); expect(res).toBe("Hippopotomonstrosesquippedalio\nphobia ??????"); }); it("should ignore trailing whitespaces when word breaks and line breaks", () => { const text = "Hippopotomonstrosesquippedaliophobia ??????"; const maxWidth = 180; const res = wrapText(text, font, maxWidth); expect(res).toBe("Hippopotomonstrose\nsquippedaliophobia\n??????"); }); }); describe("When text doesn't contain new lines", () => { const text = "Hello whats up"; [ { desc: "break all words when width of each word is less than container width", width: 70, res: `Hello\nwhats\nup`, }, { desc: "break all characters when width of each character is less than container width", width: 15, res: `H\ne\nl\nl\no\nw\nh\na\nt\ns\nu\np`, }, { desc: "break words as per the width", width: 130, res: `Hello whats\nup`, }, { desc: "fit the container", width: 240, res: "Hello whats up", }, { desc: "push the word if its equal to max width", width: 50, res: `Hello\nwhats\nup`, }, ].forEach((data) => { it(`should ${data.desc}`, () => { const res = wrapText(text, font, data.width); expect(res).toEqual(data.res); }); }); }); describe("When text contain new lines", () => { const text = `Hello\n whats up`; [ { desc: "break all words when width of each word is less than container width", width: 70, res: `Hello\n whats\nup`, }, { desc: "break all characters when width of each character is less than container width", width: 15, res: `H\ne\nl\nl\no\n\nw\nh\na\nt\ns\nu\np`, }, { desc: "break words as per the width", width: 140, res: `Hello\n whats up`, }, ].forEach((data) => { it(`should respect new lines and ${data.desc}`, () => { const res = wrapText(text, font, data.width); expect(res).toEqual(data.res); }); }); }); describe("When text is long", () => { const text = `hellolongtextthisiswhatsupwithyouIamtypingggggandtypinggg break it now`; [ { desc: "fit characters of long string as per container width", width: 160, res: `hellolongtextthi\nsiswhatsupwithyo\nuIamtypingggggan\ndtypinggg break\nit now`, }, { desc: "fit characters of long string as per container width and break words as per the width", width: 120, res: `hellolongtex\ntthisiswhats\nupwithyouIam\ntypingggggan\ndtypinggg\nbreak it now`, }, { desc: "fit the long text when container width is greater than text length and move the rest to next line", width: 590, res: `hellolongtextthisiswhatsupwithyouIamtypingggggandtypinggg\nbreak it now`, }, ].forEach((data) => { it(`should ${data.desc}`, () => { const res = wrapText(text, font, data.width); expect(res).toEqual(data.res); }); }); }); describe("Test parseTokens", () => { it("should tokenize latin", () => { let text = "Excalidraw is a virtual collaborative whiteboard"; expect(parseTokens(text)).toEqual([ "Excalidraw", " ", "is", " ", "a", " ", "virtual", " ", "collaborative", " ", "whiteboard", ]); text = "Wikipedia is hosted by Wikimedia- Foundation, a non-profit organization that also hosts a range-of other projects"; expect(parseTokens(text)).toEqual([ "Wikipedia", " ", "is", " ", "hosted", " ", "by", " ", "Wikimedia-", " ", "Foundation,", " ", "a", " ", "non-", "profit", " ", "organization", " ", "that", " ", "also", " ", "hosts", " ", "a", " ", "range-", "of", " ", "other", " ", "projects", ]); }); it("should not tokenize number", () => { const text = "99,100.99"; const tokens = parseTokens(text); expect(tokens).toEqual(["99,100.99"]); }); it("should tokenize joined emojis", () => { const text = `πŸ˜¬πŸŒπŸ—ΊπŸ”₯β˜‚οΈπŸ‘©πŸ½β€πŸ¦°πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦πŸ‘©πŸΎβ€πŸ”¬πŸ³οΈβ€πŸŒˆπŸ§”β€β™€οΈπŸ§‘β€πŸ€β€πŸ§‘πŸ™…πŸ½β€β™‚οΈβœ…0οΈβƒ£πŸ‡¨πŸ‡ΏπŸ¦…`; const tokens = parseTokens(text); expect(tokens).toEqual([ "😬", "🌍", "πŸ—Ί", "πŸ”₯", "β˜‚οΈ", "πŸ‘©πŸ½β€πŸ¦°", "πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦", "πŸ‘©πŸΎβ€πŸ”¬", "πŸ³οΈβ€πŸŒˆ", "πŸ§”β€β™€οΈ", "πŸ§‘β€πŸ€β€πŸ§‘", "πŸ™…πŸ½β€β™‚οΈ", "βœ…", "0️⃣", "πŸ‡¨πŸ‡Ώ", "πŸ¦…", ]); }); it("should tokenize emojis mixed with mixed text", () => { const text = `😬a🌍bπŸ—ΊcπŸ”₯dβ˜‚οΈγ€ŠπŸ‘©πŸ½β€πŸ¦°γ€‹πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦εΎ·πŸ‘©πŸΎβ€πŸ”¬γ“πŸ³οΈβ€πŸŒˆμ•ˆπŸ§”β€β™€οΈgπŸ§‘β€πŸ€β€πŸ§‘hπŸ™…πŸ½β€β™‚οΈeβœ…f0️⃣gπŸ‡¨πŸ‡Ώ10πŸ¦…#hash`; const tokens = parseTokens(text); expect(tokens).toEqual([ "😬", "a", "🌍", "b", "πŸ—Ί", "c", "πŸ”₯", "d", "β˜‚οΈ", "γ€Š", "πŸ‘©πŸ½β€πŸ¦°", "》", "πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦", "εΎ·", "πŸ‘©πŸΎβ€πŸ”¬", "こ", "πŸ³οΈβ€πŸŒˆ", "μ•ˆ", "πŸ§”β€β™€οΈ", "g", "πŸ§‘β€πŸ€β€πŸ§‘", "h", "πŸ™…πŸ½β€β™‚οΈ", "e", "βœ…", "f0️⃣g", // bummer, but ok, as we traded kecaps not breaking (less common) for hash and numbers not breaking (more common) "πŸ‡¨πŸ‡Ώ", "10", // nice! do not break the number, as it's by default matched by \p{Emoji} "πŸ¦…", "#hash", // nice! do not break the hash, as it's by default matched by \p{Emoji} ]); }); it("should tokenize decomposed chars into their composed variants", () => { // each input character is in a decomposed form const text = "cΜŒγ¦γ‚™aΜˆγ²γ‚šΞ΅Μα„ƒα…‘ΠΈΜ†α„’α…‘α†«"; expect(text.normalize("NFC").length).toEqual(8); expect(text).toEqual(text.normalize("NFD")); const tokens = parseTokens(text); expect(tokens.length).toEqual(8); expect(tokens).toEqual(["č", "で", "Γ€", "ぴ", "Ξ­", "λ‹€", "ΠΉ", "ν•œ"]); }); it("should tokenize artificial CJK", () => { const text = `γ€Šι“εΎ·ηΆ“γ€‹ι†«-ι†«γ“γ‚“γ«γ‘γ―δΈ–η•ŒοΌμ•ˆλ…•ν•˜μ„Έμš”μ„Έκ³„οΌ›μš”γ€,λ‹€.λ‹€...원/달(((λ‹€)))[[1]]γ€š({((ν•œ))>)γ€›(γ€ŒγŸγ€)γŸβ€¦[Hello] \tγ€€WorldοΌŸγƒ‹γƒ₯ーヨーク・οΏ₯3700.55す。090-1234-5678οΏ₯1,000γ€œοΌ„5,000γ€Œη΄ ζ™΄γ‚‰γ—γ„οΌγ€γ€”ι‡θ¦γ€•οΌƒοΌ‘οΌšTaro君30οΌ…γ―γ€οΌˆγŸγͺγ°γŸοΌ‰γ€°οΏ₯110Β±οΏ₯570で20β„ƒγ€œ9:30γ€œ10:00【一η•ͺ】`; // [ // 'γ€Šι“', 'εΎ·', '碓》', '醫-', // '醫', 'こ', 'γ‚“', 'に', // 'け', 'は', 'δΈ–', 'η•ŒοΌ', // 'μ•ˆ', 'λ…•', 'ν•˜', 'μ„Έ', // 'μš”', 'μ„Έ', '계;', 'μš”γ€,', // 'λ‹€.', 'λ‹€...', '원/', '달', // '(((λ‹€)))', '[[1]]', 'γ€š({((ν•œ))>)γ€›', '(γ€ŒγŸγ€)', // 'γŸβ€¦', '[Hello]', ' ', '\t', // 'γ€€', 'World?', 'ニ', 'γƒ₯', // 'γƒΌ', 'ヨ', 'γƒΌ', 'ク・', // 'οΏ₯3700.55', 'す。', '090-', '1234-', // '5678', 'οΏ₯1,000γ€œ', 'οΌ„5,000', 'γ€Œη΄ ', // 'ζ™΄', 'ら', 'し', 'い!」', // '〔重', '要〕', 'οΌƒ', 'οΌ‘οΌš', // 'Taro', '君', '30οΌ…', 'は、', // '(た', 'γͺ', 'ば', 'γŸοΌ‰', // 'γ€°', 'οΏ₯110Β±', 'οΏ₯570', 'で', // '20β„ƒγ€œ', '9:30γ€œ', '10:00', '【一', // 'η•ͺ】' // ] const tokens = parseTokens(text); // Latin expect(tokens).toContain("[[1]]"); expect(tokens).toContain("[Hello]"); expect(tokens).toContain("World?"); expect(tokens).toContain("Taro"); // Chinese expect(tokens).toContain("γ€Šι“"); expect(tokens).toContain("εΎ·"); expect(tokens).toContain("碓》"); expect(tokens).toContain("醫-"); expect(tokens).toContain("醫"); // Japanese expect(tokens).toContain("こ"); expect(tokens).toContain("γ‚“"); expect(tokens).toContain("に"); expect(tokens).toContain("け"); expect(tokens).toContain("は"); expect(tokens).toContain("δΈ–"); expect(tokens).toContain("ク・"); expect(tokens).toContain("η•ŒοΌ"); expect(tokens).toContain("γŸβ€¦"); expect(tokens).toContain("す。"); expect(tokens).toContain("γƒ₯"); expect(tokens).toContain("γ€Œη΄ "); expect(tokens).toContain("ζ™΄"); expect(tokens).toContain("ら"); expect(tokens).toContain("し"); expect(tokens).toContain("い!」"); expect(tokens).toContain("君"); expect(tokens).toContain("は、"); expect(tokens).toContain("(た"); expect(tokens).toContain("γͺ"); expect(tokens).toContain("ば"); expect(tokens).toContain("γŸοΌ‰"); expect(tokens).toContain("で"); expect(tokens).toContain("【一"); expect(tokens).toContain("η•ͺ】"); // Check for Korean expect(tokens).toContain("μ•ˆ"); expect(tokens).toContain("λ…•"); expect(tokens).toContain("ν•˜"); expect(tokens).toContain("μ„Έ"); expect(tokens).toContain("μš”"); expect(tokens).toContain("μ„Έ"); expect(tokens).toContain("계;"); expect(tokens).toContain("μš”γ€,"); expect(tokens).toContain("λ‹€."); expect(tokens).toContain("λ‹€..."); expect(tokens).toContain("원/"); expect(tokens).toContain("달"); expect(tokens).toContain("(((λ‹€)))"); expect(tokens).toContain("γ€š({((ν•œ))>)γ€›"); expect(tokens).toContain("(γ€ŒγŸγ€)"); // Numbers and units expect(tokens).toContain("οΏ₯3700.55"); expect(tokens).toContain("090-"); expect(tokens).toContain("1234-"); expect(tokens).toContain("5678"); expect(tokens).toContain("οΏ₯1,000γ€œ"); expect(tokens).toContain("οΌ„5,000"); expect(tokens).toContain("οΌ‘οΌš"); expect(tokens).toContain("30οΌ…"); expect(tokens).toContain("οΏ₯110Β±"); expect(tokens).toContain("20β„ƒγ€œ"); expect(tokens).toContain("9:30γ€œ"); expect(tokens).toContain("10:00"); // Punctuation and symbols expect(tokens).toContain(" "); expect(tokens).toContain("\t"); expect(tokens).toContain("γ€€"); expect(tokens).toContain("ニ"); expect(tokens).toContain("γƒΌ"); expect(tokens).toContain("ヨ"); expect(tokens).toContain("γ€°"); expect(tokens).toContain("οΌƒ"); }); }); });