This release is 27 versions behind 1.0.5 — the latest version of @std/html. Jump to latest
Works with
•JSR Score94%•This package works with Cloudflare Workers, Node.js, Deno, Bun, Browsers




Downloads6,448/wk
•Published2 years ago (0.210.0)
Functions for HTML, such as escaping or unescaping HTML entities
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104// Copyright 2018-2023 the Deno authors. All rights reserved. MIT license. // This module is browser compatible. export type EntityList = Record<string, string>; const rawToEntityEntries = [ ["&", "&"], ["<", "<"], [">", ">"], ['"', """], ["'", "'"], ] as const; const defaultEntityList: EntityList = Object.fromEntries([ ...rawToEntityEntries.map(([raw, entity]) => [entity, raw]), ["'", "'"], [" ", "\xa0"], ]); const rawToEntity = new Map<string, string>(rawToEntityEntries); const rawRe = new RegExp(`[${[...rawToEntity.keys()].join("")}]`, "g"); /** * Escapes text for safe interpolation into HTML text content and quoted attributes * * @example * ```ts * import { escape } from "@std/html/entities"; * import { assertEquals } from "@std/assert/assert_equals"; * * assertEquals(escape("<>'&AA"), "<>'&AA"); * * // characters that don't need to be escaped will be left alone, * // even if named HTML entities exist for them * assertEquals(escape("þð"), "þð"); * ``` */ export function escape(str: string): string { return str.replaceAll(rawRe, (m) => rawToEntity.get(m)!); } export type UnescapeOptions = { entityList: EntityList }; const defaultUnescapeOptions: UnescapeOptions = { entityList: defaultEntityList, }; const MAX_CODE_POINT = 0x10ffff; const RX_DEC_ENTITY = /&#([0-9]+);/g; const RX_HEX_ENTITY = /&#x(\p{AHex}+);/gu; const entityListRegexCache = new WeakMap<EntityList, RegExp>(); /** * Unescapes HTML entities in text * * @example * ```ts * import { unescape } from "@std/html/entities"; * import { assertEquals } from "@std/assert/assert_equals"; * * // default options (only handles &<>'" and numeric entities) * assertEquals(unescape("<>'&AA"), "<>'&AA"); * assertEquals(unescape("þð"), "þð"); * * // using the full named entity list from the HTML spec (~47K unminified) * import entityList from "@std/html/named_entity_list.json" assert { type: "json" }; * assertEquals(unescape("þð", { entityList }), "þð"); * ``` */ export function unescape( str: string, options: Partial<UnescapeOptions> = {}, ): string { const { entityList } = { ...defaultUnescapeOptions, ...options }; let entityRe = entityListRegexCache.get(entityList); if (!entityRe) { entityRe = new RegExp( `(${ Object.keys(entityList) .sort((a, b) => b.length - a.length) .join("|") })`, "g", ); entityListRegexCache.set(entityList, entityRe); } return str .replaceAll(entityRe, (m) => entityList[m]) .replaceAll(RX_DEC_ENTITY, (_, dec) => codePointStrToChar(dec, 10)) .replaceAll(RX_HEX_ENTITY, (_, hex) => codePointStrToChar(hex, 16)); } function codePointStrToChar(codePointStr: string, radix: number) { const codePoint = parseInt(codePointStr, radix); return codePoint > MAX_CODE_POINT ? "�" : String.fromCodePoint(codePoint); }