subNonStandardNames {Ecdat} | R Documentation |
sub(nonEnglishData["nonEnglish"], nonEnglishData["English"], x)
subNonStandardNames(x, standardCharacters=c(letters, LETTERS, ' ','.', ',', 0:9, '\"', "\'", '-', '_', '(', ')', '[', ']', '\n'), replacement='_', gsubList=list(list(pattern='\\\\\\\\|\\\\', replacement='\"')), removeSecondLine=TRUE, nonStandardNames=nonEnglishNames, ...)
x |
character vector in which it is desired replace
|
standardCharacters, replacement, gsubList, ... |
arguments passed to |
removeSecondLine |
logical: If TRUE, delete anything following "\n" and return it as an attribute "secondLine" |
nonStandardNames |
data.frame or character matrix with two columns: Replace any
substring of |
1. removeSecondLine
2. x. <- subNonStandardCharacters(x, standardCharacters, replacement, ...)
3. Loop over all rows of nonStandardNames
substituting anything
matching nonEnglishData[i, 1]
with nonEnglishData[i,
2]
.
4. Eliminate leading and trailing blanks.
a character vector with all nonStandardCharacters
replaced first
by replacement
and then by the second column of
nonStandardNames
for any that match the first column.
Spencer Graves
sub
nonEnglishNames
subNonStandardCharacters
Names <- c('Raul', 'Ra`l', 'Torres,Raul', 'Torres, Raul', "Robert C. \\Bobby\\\\", 'Ed \n --Vacancy') # confusion in character sets can create # names like Names[2] data(nonEnglishNames) Name2 <- subNonStandardNames(Names) Name2 Name2. <- c('Raul', 'Raul', Names[3:4], 'Robert C. "Bobby"', 'Ed') attr(Name2., 'secondLine') <- c(rep(NA, 5), ' --Vacancy') Name2. all.equal(Name2, Name2.)