Recently, I needed a way to convert JavaScript escaped characters to HTML/XML entities for saving files in UTF-8 encoding without saving the special characters to disk. Luckily this was pretty easy. I used JavaScript's built-in function escape() to escape the input text and used regular expressions to find each escaped value and replace them in the input text with their new HTML/XML entity equivalent. For example, the text "Copyright Some Company ©" would escape to "Copyright%20Some%20Company%20%A9" using JavaScript's built-in function escape(). After converting the escaped characters to entities, the result would be "Copyright Some Company ©". When saved in a database and rendered as HTML to a webpage, it would display the original message "Copyright Some Company ©". Here's the code for anyone interested:
// convert escaped characters to html/xml entities
function normalizeText(text)
{
// escape text for special characters
var esc = escape(text);
var simpleCharsRegex = /%[0-9A-Fa-f][0-9A-Fa-f]/g;
var specialCharsRegex = /%u[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f]/g;
var simpleCharsMatch = esc.match(simpleCharsRegex);
var specialCharsMatch = esc.match(specialCharsRegex);
if(simpleCharsMatch!=null)
{
for(var i = 0; i < simpleCharsMatch.length; i++)
{
var temp = simpleCharsMatch[i];
simpleCharsMatch[i] = simpleCharsMatch[i].replace(/%/, 'x') + ';';
esc = esc.replace(temp, simpleCharsMatch[i]);
}
}
if(specialCharsMatch!=null)
{
for(var i = 0; i < specialCharsMatch.length; i++)
{
var temp = specialCharsMatch[i];
specialCharsMatch[i] = specialCharsMatch[i].replace(/%u/, 'x') + ';';
esc = esc.replace(temp, specialCharsMatch[i]);
}
}
return esc;
}
I understand there may be a more efficient way to do this, but this code does work in a pinch. Please let me know in the comments if you have any suggestions for improving this code.