paste("A dog", "is running") # 引数同士をスペースを挟んでつなぐ## [1] "A dog is running"paste("A", "dog", "is", "running") # 引数は2つ以上でもよい## [1] "A dog is running"paste("A dog", "is running", sep="/") # sepに指定した文字が引数の間に入る## [1] "A dog/is running"paste0("A dog", "is running") # sepに何も追加したくない場合## [1] "A dogis running"
sprintf("%f", pi)## [1] "3.141593"# .2fで小数点2桁まで表示sprintf("%sは%.2fです。", "円周率", pi)## [1] "円周率は3.14です。"# 文字列の部分に数値、数値の部分に文字列が来るのでエラーsprintf("%sは%.2fです。", pi, "円周率") ## Error in sprintf("%sは%.2fです。", pi, "円周率"): invalid format '%.2f'; use format %s for character objects
x## [1] "A dog is running" "A cat is running"substr(x, start =3, stop =5) # xの3文字目から5文字目## [1] "dog" "cat"substring(x, 3) # 3文字目以降を取得## [1] "dog is running" "cat is running"
x## [1] "A dog is running" "A cat is running"strsplit(x, " ") # スペースで分離。リストが返ってくる## [[1]]## [1] "A" "dog" "is" "running"## ## [[2]]## [1] "A" "cat" "is" "running"strsplit(x, "i") # i で分離## [[1]]## [1] "A dog " "s runn" "ng" ## ## [[2]]## [1] "A cat " "s runn" "ng"
x## [1] "A dog is running" "A cat is running"sub(pattern ="n", replacement ="N", x) # 始めの要素だけ置き換え## [1] "A dog is ruNning" "A cat is ruNning"gsub(pattern ="n", replacement ="N", x) # すべて置き換え## [1] "A dog is ruNNiNg" "A cat is ruNNiNg"chartr("n", "N", x) # 上のgsub関数と同じ結果## [1] "A dog is ruNNiNg" "A cat is ruNNiNg"
9.1.8 小文字、大文字に変換:tolower toupper
文字列を小文字に置き換えるのがtolower関数、大文字に置き換えるのがtoupper関数です。
小文字・大文字の変換
tolower("A CAT IS RUNNING") # 小文字に変換## [1] "a cat is running"x## [1] "A dog is running" "A cat is running"toupper(x) # 大文字に変換## [1] "A DOG IS RUNNING" "A CAT IS RUNNING"
str_trim(" x ") # スペースを取り除く## [1] "x"x## [1] "A dog is running" "A cat is running"str_trunc(x, 12) # 後ろを切り取って...で省略## [1] "A dog is ..." "A cat is ..."str_trunc(x, 12, side="left") # 前を切り取って...で省略## [1] "...s running" "...s running"
x## [1] "A dog is running" "A cat is running"str_sub(x, start=3, end=5) # 位置を特定して抽出## [1] "dog" "cat"str_subset(x, "cat") # 文字を含む要素を抽出## [1] "A cat is running"str_subset(x, "is")## [1] "A dog is running" "A cat is running"str_subset(x, "rat")## character(0)
x## [1] "A dog is running" "A cat is running"str_extract(x, "is") # 特定の文字列を抽出## [1] "is" "is"str_extract(x, "dog") # 抽出できないとNAを返す## [1] "dog" NAstr_extract_all(x, "n") # パターン一致するものをすべて抽出## [[1]]## [1] "n" "n" "n"## ## [[2]]## [1] "n" "n" "n"
x## [1] "A dog is running" "A cat is running"str_match(x, "dog") # パターンがあれば、そのパターンを返す## [,1] ## [1,] "dog"## [2,] NAstr_match_all(x, "n") # パターンがあれば、それをすべて返す## [[1]]## [,1]## [1,] "n" ## [2,] "n" ## [3,] "n" ## ## [[2]]## [,1]## [1,] "n" ## [2,] "n" ## [3,] "n"
x## [1] "A dog is running" "A cat is running"str_c(x[1], x[2]) # paste0と同じ## [1] "A dog is runningA cat is running"str_c(x[1], x[2], sep=" ") # pasteと同じ## [1] "A dog is running A cat is running"str_flatten(c("a", "dog", "is", "running")) # paste0と同じ## [1] "adogisrunning"str_flatten(c("a", "dog", "is", "running"), collapse=" ") # pasteと同じ## [1] "a dog is running"
x## [1] "A dog is running" "A cat is running"str_split(x, " ") # パターンで分割## [[1]]## [1] "A" "dog" "is" "running"## ## [[2]]## [1] "A" "cat" "is" "running"str_split_fixed(x, " ", 2) # 始めのパターンで2つに分割## [,1] [,2] ## [1,] "A" "dog is running"## [2,] "A" "cat is running"str_split_i(x, " ", 2) # パターンで分割し、2つ目の要素を取り出す## [1] "dog" "cat"
x## [1] "A dog is running" "A cat is running"str_replace(x, "running", "walking") # 前のパターンを後ろの文字列に置き換える## [1] "A dog is walking" "A cat is walking"str_replace_all(x, " ", ",") # 前のパターンをすべて、後ろの文字列に置き換える## [1] "A,dog,is,running" "A,cat,is,running"
str_to_lower("A DOG IS RUNNING")## [1] "a dog is running"x## [1] "A dog is running" "A cat is running"str_to_upper(x)## [1] "A DOG IS RUNNING" "A CAT IS RUNNING"
とは言っても、正規表現をまるまる覚えるのは大変ですし、いちいち検索して正規表現でパターンを表現するのも場合によってはかなり大変です。rexパッケージ(Ushey, Hester, and Krzyzanowski 2021)はこのような複雑な正規表現を人にも理解しやすい形で作成できるようにするためのパッケージです。よく用いられる正規表現はshortcutsというオブジェクト(リストと同じように取り扱えます)に含まれていますし、rex関数を用いてより複雑な正規表現を作成することもできます。
Gagolewski, Marek. 2022. “stringi: Fast and Portable Character String Processing in R.”Journal of Statistical Software 103 (2): 1–59. https://doi.org/10.18637/jss.v103.i02.