16 Rcpp
16.1 Getting started with C++
Q: With the basics of C++ in hand, it’s now a great time to practice by reading and writing some simple C++ functions. For each of the following functions, read the code and figure out what the corresponding base R function is. You might not understand every part of the code yet, but you should be able to figure out the basics of what the function does.
double f1(NumericVector x) { int n = x.size(); double y = 0; for(int i = 0; i < n; ++i) { y += x[i] / n; } return y; } NumericVector f2(NumericVector x) { int n = x.size(); NumericVector out(n); out[0] = x[0]; for(int i = 1; i < n; ++i) { out[i] = out[i - 1] + x[i]; } return out; } bool f3(LogicalVector x) { int n = x.size(); for(int i = 0; i < n; ++i) { if (x[i]) return true; } return false; } int f4(Function pred, List x) { int n = x.size(); for(int i = 0; i < n; ++i) { LogicalVector res = pred(x[i]); if (res[0]) return i + 1; } return 0; } NumericVector f5(NumericVector x, NumericVector y) { int n = std::max(x.size(), y.size()); NumericVector x1 = rep_len(x, n); NumericVector y1 = rep_len(y, n); NumericVector out(n); for (int i = 0; i < n; ++i) { out[i] = std::min(x1[i], y1[i]); } return out; }
A: The R equivalents are:
- f1:
mean()
- f2:
cumsum()
- f3:
any()
- f4:
Position()
- f5:
pmin()
- f1:
Q: To practice your function writing skills, convert the following functions into C++. For now, assume the inputs have no missing values.
all()
bool allC(LogicalVector x) { int n = x.size(); for(int i = 0; i < n; ++i) { if (!x[i]) return false; } return true; }
cumprod()
,cummin()
,cummax()
.
NumericVector cumprodC(NumericVector x) { int n = x.size(); NumericVector out(n); out[0] = x[0]; for(int i = 1; i < n; ++i) { out[i] = out[i - 1] * x[i]; } return out; } NumericVector cumminC(NumericVector x) { int n = x.size(); NumericVector out(n); out[0] = x[0]; for(int i = 1; i < n; ++i) { out[i] = std::min(out[i - 1], x[i]); } return out; } NumericVector cummaxC(NumericVector x) { int n = x.size(); NumericVector out(n); out[0] = x[0]; for(int i = 1; i < n; ++i) { out[i] = std::max(out[i - 1], x[i]); } return out; }
diff()
. Start by assuming lag 1, and then generalise for lagn
.
NumericVector diffC(NumericVector x){ int n = x.size(); NumericVector out(n - 1); for(int i = 1; i < n; i++){ out[i - 1] = x[i] - x[i - 1]; } return out ; } NumericVector difflagC(NumericVector x, int lag){ int n = x.size(); NumericVector out(n - lag); for(int i = lag; i < n; i++){ out[i - lag] = x[i] - x[i - lag]; } return out; }'
range
.
NumericVector rangeC(NumericVector x){ double omin, omax; int n = x.size(); NumericVector out(2); omin = x[0]; omax = x[0]; for(int i = 1; i < n; i++){ omin = std::min(x[i], omin); omax = std::max(x[i], omax); } out[0] = omin; out[1] = omax; return out;
var
. Read about the approaches you can take on wikipedia. Whenever implementing a numerical algorithm, it’s always good to check what is already known about the problem.
16.2 Missing values
Q: Rewrite any of the functions from the first exercise to deal with missing values. If
na.rm
is true, ignore the missing values. Ifna.rm
is false, return a missing value if the input contains any missing values. Some good functions to practice with aremin()
,max()
,range()
,mean()
, andvar()
.A:
NumericVector minC(NumericVector x, bool narm){ int n = x.size(); LogicalVector index = is_na(x); NumericVector omin(1); bool na_check = false; bool na_check_all = true; for(int i = 0; i<n; i++){ if (index[i]) na_check = true; } for(int i = 0; i<n; i++){ if (!index[i]) na_check_all = false; } if (narm) { for(int i = n-1; i >= 0; i--){ if (!index[i]) omin[0] = x[i]; } for(int i = 1; i < n; i++) { if (!index[i]) omin[0] = std::min(x[i], omin[0]); } if (na_check_all) { omin[0] = NA_REAL; } } else if (na_check) { omin[0] = NA_REAL; } else { omin[0] = x[0]; for(int i = 1; i < n; i++){ omin = std::min(x[i], omin[0]); } } return omin; } NumericVector maxC(NumericVector x, bool narm){ int n = x.size(); LogicalVector index = is_na(x); NumericVector omax(1); bool na_check = false; bool na_check_all = true; for(int i = 0; i<n; i++){ if (index[i]) na_check = true; } for(int i = 0; i<n; i++){ if (!index[i]) na_check_all = false; } if (narm) { for(int i = n-1; i >= 0; i--){ if (!index[i]) omax[0] = x[i]; } for(int i = 1; i < n; i++) { if (!index[i]) omax[0] = std::max(x[i], omax[0]); } if (na_check_all) { omax[0] = NA_REAL; } } else if (na_check) { omax[0] = NA_REAL; } else { omax[0] = x[0]; for(int i = 1; i < n; i++){ omax = std::max(x[i], omax[0]); } } return omax; } NumericVector rangeC(NumericVector x, bool narm){ NumericVector out(2); NumericVector omin(1); NumericVector omax(1); omin = minC(x, narm); omax = maxC(x, narm); out[0] = omin[0]; out[1] = omax[0]; return out; } } NumericVector meanC(NumericVector x, bool narm){ int n = x.size(); LogicalVector index = is_na(x); bool na_check = false; bool na_check_all = true; int n_corrected = 0; NumericVector out(1); for(int i = 0; i<n; i++){ if (index[i]) na_check = true; } for(int i = 0; i<n; i++){ if (!index[i]) na_check_all = false; } for(int i = 0; i<n; i++){ if (!index[i]) n_corrected++; } /* narm = T */ if (narm){ if (na_check_all) { out[0] = NA_REAL; } else { out[0] = 0; for(int i = 0; i < n; ++i) { if (!index[i]) out[0] += x[i] / n_corrected; } } } /* narm = F */ if (!narm){ if (na_check) { out[0] = NA_REAL; } else { for(int i = 0; i < n; ++i) { out[0] += x[i] / n; } } } return out; }
Q: Rewrite
cumsum()
anddiff()
so they can handle missing values. Note that these functions have slightly more complicated behaviour.A:
NumericVector cumsumC(NumericVector x) { int n = x.size(); NumericVector out(n); LogicalVector index = is_na(x); out[0] = x[0]; for(int i = 1; i < n; ++i) { if (index[i - 1]) { out[i] = NA_REAL; } else{ out[i] = out[i - 1] + x[i]; } } return out; } NumericVector difflagC(NumericVector x, int lag){ int n = x.size(); NumericVector out(n - lag); LogicalVector index = is_na(x); for(int i = lag; i < n; i++){ if ((index[i]) || (index[i - lag])) { out[i - lag] = NA_REAL; } else { out[i - lag] = x[i] - x[i - lag]; } } return out; }
16.3 The STL
To practice using the STL algorithms and data structures, implement the following using R functions in C++, using the hints provided:
median.default()
usingpartial_sort
.#include <algorithm> #include <Rcpp.h> using namespace Rcpp; // [[Rcpp::export]] double medianC(NumericVector x) { int n = x.size(); double out; if (n % 2 == 0){ std::partial_sort (x.begin(), x.begin() + n / 2 + 1, x.end()); out = (x[n / 2 - 1] + x[n / 2]) / 2; } else { std::partial_sort (x.begin(), x.begin() + (n + 1) / 2, x.end()); out = x[(n + 1) / 2 - 1]; } return out; }
%in%
usingunordered_set
and thefind()
orcount()
methods.unique()
using anunordered_set
(challenge: do it in one line!).// [[Rcpp::plugins(cpp11)]] #include <Rcpp.h> #include <unordered_set> using namespace Rcpp; // [[Rcpp::export]] NumericVector uniqueC(NumericVector x) { std::unordered_set<int> seen; int n = x.size(); std::vector<double> out; for (int i = 0; i < n; ++i) { if (seen.insert(x[i]).second) out.push_back(x[i]); } return wrap(out); }
min()
usingstd::min()
, ormax()
usingstd::max()
.#include <Rcpp.h> using namespace Rcpp; // [[Rcpp::export]] double minC(NumericVector x){ int n = x.size(); double out = x[0]; for (int i = 0; i < n; i++){ out = std::min(out, x[i]); } return out; }
which.min()
usingmin_element
, orwhich.max()
usingmax_element
.#include <Rcpp.h> #include <algorithm> #include <iterator> using namespace Rcpp; // [[Rcpp::export]] double which_minC(NumericVector x){ int out; out = std::distance(x.begin(),std::min_element(x.begin(),x.end())); out++; return out; }
setdiff()
,union()
, andintersect()
for integers using sorted ranges andset_union
,set_intersection
andset_difference
.