Javascript code to clean Microsoft Word content
Microsoft Word creates awful HTML code. If you need to use it on a web page (especially in an HTML editor), you will want to clean it up a bit. Here’s a Javascript function that will take a string of text copied from Microsoft Word, and return it minus all the extraneous formatting that Word adds:
function cleanWordContent(wordContent)
{
wordDiv = document.createElement(“DIV”);
wordDiv.innerHTML = wordContent;
for (var i=0;i
wordDiv.all[i].removeAttribute(“className”,””,0);
wordDiv.all[i].removeAttribute(“style”,””,0);
}
wordContent = wordDiv.innerHTML;
wordContent = String(wordContent).replace(/<\\?\?xml[^>]*>/g,””);
wordContent = String(wordContent).replace(/<\/?o:p[^>]*>/g,””);
wordContent = String(wordContent).replace(/<\/?v:[^>]*>/g,””);
wordContent = String(wordContent).replace(/<\/?o:[^>]*>/g,””);
wordContent = String(wordContent).replace(/ /g,””);//
wordContent = String(wordContent).replace(/<\/?SPAN[^>]*>/g,””);
wordContent = String(wordContent).replace(/<\/?FONT[^>]*>/g,””);
wordContent = String(wordContent).replace(/<\/?STRONG[^>]*>/g,””);
wordContent = String(wordContent).replace(/<\/?P[^>]*><\/P>/g,””);
wordContent = String(wordContent).replace(/<\/?H1[^>]*>/g,””);
wordContent = String(wordContent).replace(/<\/?H2[^>]*>/g,””);
wordContent = String(wordContent).replace(/<\/?H3[^>]*>/g,””);
wordContent = String(wordContent).replace(/<\/?H4[^>]*>/g,””);
wordContent = String(wordContent).replace(/<\/?H5[^>]*>/g,””);
wordContent = String(wordContent).replace(/<\/?H6[^>]*>/g,””);
return(wordContent);
}
Yair
for (var i=0;i {
must be an error...