on ReplaceByChunks(aString, aSubString, aReplacement) ---------------- -- INPUT: is the main string to search in -- is a string which may appear in -- is a string which is to replace all -- occurrences of in -- OUTPUT: returns an updated version of where all -- occurrences of have been replaced by -- -- NEEDS: ReplaceAll() -- NOTE: On Macintosh, it can help to break a long string down -- into shorter strings before performing Search-and-Replace -- on a string containing a large number of occurrences. -- Empirical tests suggest that the optimal string length is -- between 2400 and 7200 characters. 4096 was chosen as a -- power of 2 in the middle of this range. You may wish to -- confirm that this figure is appropriate on your own -- suite of test machines. -------------------------------------------------------------------- tOutput = "" tChunkLength = 4096 tLength = the number of chars of aString tChunks = tLength / tChunkLength + ((tLength mod tChunkLength) <> 0) tStart = 1 repeat with i = 1 to tChunks tEnd = i * tChunkLength tChunk = chars(aString, tStart, tEnd) tChunk = ReplaceAll(tChunk, aSubString, aReplacement) put tChunk after tOutput tStart = tEnd + 1 end repeat -- Ensure that there are no occurrencs that straddled two chunks. -- As the number of occurrences is now relatively low, this can be -- done using the complete string. tOutput = ReplaceAll(tOutput, aSubString, aReplacement) return tOutput end ReplaceByChunks on ReplaceAll(aString, aSubString, aReplacement) --------------------- -- INPUT: is the main string to search in -- is a string which may appear in -- is a string which is to replace all -- occurrences of in -- OUTPUT: returns an updated version of where all -- occurrences of have been replaced by -- -------------------------------------------------------------------- if aSubString = "" then return aString end if tTreatedString = "" tLengthAdjust = the number of chars of aSubString - 1 repeat while TRUE tOffset = offset(aSubString, aString) if tOffset then -- There is at least one more occurrence... if tOffset - 1 then -- ... but there is intervening text to copy first put chars(aString, 1, (tOffset - 1)) after tTreatedString end if put aReplacement after tTreatedString delete char 1 to (tOffset + tLengthAdjust) of aString else -- There are no more occurrences. Add the remaining text -- after the last replaced occurrence. put aString after tTreatedString return tTreatedString end if end repeat end ReplaceAll on FindOccurrences(aSourceText, aListOfStrings) ---------------------- -- INPUT: a string to be searched -- a string or list of strings to find in -- aSourceText -- OUTPUT: a list of of two-items lists, where the first item in -- each sub-list is the start character for one of the -- occurrences of a string in aListOfStrings, and the -- second item is the last character in that occurrence. -- The sub-lists may overlap: -- EXAMPLE: put FindOccurrences("Forever", ["for", "ore"]) -- -- [[1, 3], [2, 4]] -------------------------------------------------------------------- tOccurrencesList = [] if not stringP(aSourceText) then return #invalidString end if case ilk(aListOfStrings) of #list, #propList: -- continue #string: aListOfStrings = list(aListOfStrings) otherwise: return #invalidList end case tCount = aListOfStrings.count() repeat with i = 1 to tCount tSearchString = aListOfStrings[i] if not stringP(tSearchString) then next repeat end if tSourceCopy = aSourceText tPreviousChars = 0 tLengthAdjust = the number of chars of tSearchString - 1 repeat while TRUE tOffset = offset(tSearchString, tSourceCopy) if tOffset then tFirstChar = tPreviousChars + tOffset tLastChar = tOffset + tLengthAdjust tPreviousChars = tPreviousChars + tLastChar tOccurrencesList.append([tFirstChar, tPreviousChars]) delete char 1 to tLastChar of tSourceCopy else exit repeat end if end repeat end repeat return tOccurrencesList end FindOccurrences on SubstituteStrings(aParentString, aChildStringList) ---------------- -- INPUT: -- is a property list with the format: -- ["^0": "replacement string", ...] -- ACTION: Modifies so that the strings which appear -- as properties in are replaced by the -- values associated with those properties. -- OUTPUT: a modified version of -------------------------------------------------------------------- if ilk(aChildStringList) <> #propList then return aParentString end if i = aChildStringList.count() repeat while i tTempString = "" tDummyString = aChildStringList.getPropAt(i) tReplacement = aChildStringList[i] tLengthAdjust = the number of chars in tDummyString - 1 repeat while TRUE tOffset = offset(tDummyString, aParentString) if not tOffset then aParentString = tTempString&aParentString exit repeat else if tOffset - 1 then tTempString = tTempString&chars(aParentString, 1, tOffset - 1) end if tTempString = tTempString&tReplacement delete char 1 to (tOffset + tLengthAdjust) of aParentString end repeat i = i - 1 end repeat return aParentString end SubstituteStrings