Sunday, September 18, 2005

 

Text Processing -- Regex Version of Symbol Replacement

This time with help from Shane Stanley, I have come up with the solution to the symbol tagging using Regex. Here's the new version of the function:
function insertSymTags(theText) {
 var myRE = new RegExp("κ|µ|α|β|δ|Δ|ζ|γ|ε|λ","g"); //Seeks any of Greek glyphs
 for (var j = theText.paragraphs.length - 1; j >= 0; j--) {
  var myText = theText.paragraphs[j].contents;
  var myNewText = myText.replace(myRE, "$&");
  if (myText != myNewText) {
   if (myNewText.slice(-1) == "\r") {
    theText.paragraphs[j].characters.itemByRange(0, -2).contents = myNewText.slice(0,-1);
   } else {
    theText.paragraphs[j].contents = myNewText;
   }
  }
 }
 return true
}
While I was at it, I also did a list clean up function using Regex:
function validateLists(theText) {
 var myStyles = theText.paragraphs.everyItem().appliedParagraphStyle;
 var myRE = new RegExp("^[0-9]*\\.*\\s*");
 for (var j = myStyles.length - 1; j >= 0; j--) {
  if (myStyles[j].name.indexOf("References") != -1) {
   var myText = theText.paragraphs[j].contents;
   var myLines = myText.split("\n");
   for (var k = myLines.length - 1; k >= 0; k--) {
    var myString = String(k + 1) + ". ";
    myLines[k] = myLines[k].replace(myRE, myString);
   }
   var myNewText = myLines.join("\n");
   if (myText != myNewText) {
    if (myNewText.slice(-1) == "\r") {
     theText.paragraphs[j].characters.itemByRange(0, -2).contents = myNewText.slice(0,-1);
    } else {
     theText.paragraphs[j].contents = myNewText;
    }
   }
  }
 }
 return true
}
Phew. Let's hope I can build on these early successes and start using Regex for more of my text processing scripts.

Comments: Post a Comment

<< Home

This page is powered by Blogger. Isn't yours?