How to us R in Stata

If you need to use some R program in Stata, you can use the following template, and adapt it to your needs.

*// Set Working Directory

clear
set more off
log close _all
cd your_working_directory

* create data
set obs 100
matrix c=(1,-.5,0 -.5,1,.4 0,.4,1)
corr2data x y z, corr(c)

* export data in csv format
quietly: save "testout.dta"
quietly: file close _all

* write the R Code you want to use (dependencies: foreign)
quietly: file open rcode using test.R, write replace
quietly: file write rcode ///
\`"setwd\("c:/Stata/"\)"' \_newline ///
\`"library\(foreign\)"' \_newline ///
\`"data<-data.frame\(read.dta\("testout.dta"\)\)"' \_newline ///
\`"attach\(data\)"' \_newline ///
\`"x2<-x\*2"' \_newline ///
\`"data2<-cbind\(data,x2\)"' \_newline ///
\`"write.dta\(data2,"testin.dta"\)"'
quietly: file close rcode

* Run R
quietly: shell "C:\Program Files\R\R-2.15.1\bin\x64\R.exe" CMD BATCH test.R

* Read Revised Data Back to Stata
quietly: use "testin.dta", clear
summarize

* Clean up
rm testout.dta
rm test.R
rm .RData

From Thomas Blanchet Github

//
// The method requires the following:
//     - A working version of Stata.
//     - A working version of R installed on the computer. If it is not already
//       the case, you should download and install R from the web address
//       <https://cran.r-project.org/mirrors.html>. Make sure to choose
//       a mirror near to your location and to pick the right operating system.
//     - The packages 'haven' and 'gpinter' installed in R. To do so, launch R
//       and type in the commands:
//        > install.packages(c("haven", "devtools"))
//        > devtools::install_github("thomasblanchet/gpinter")
//     - The user-written Stata command 'rsource'. You can install it by typing:
//        > ssc install rsource
//
// If you need more help with this file, please contact
// <[email protected]>.

// -------------------------------------------------------------------------- //
// Preliminaries
// -------------------------------------------------------------------------- //

clear

// Set your working directory here
cd "~/GitHub/gpinter/inst/stata"

// If necessary, specify where R is installed on your computer. You only need
// to do it if the program cannot find R by default. On Linux and macOS, you
// should find it at the addresses "/usr/bin/r" or "/usr/local/bin/r". On
// Windows, you should locate the file "Rterm.exe".
// Type "help rsource" for more details.

*global Rterm_path `"/usr/local/bin/r"'

// -------------------------------------------------------------------------- //
// Import tabulation example (US labor income, 2010)
// -------------------------------------------------------------------------- //

input average     p  threshold  bracketavg
        37208  0.10       4130       12643
            .  0.50      23686       43908
            .  0.90      76252      108329
            .  0.99     211861      471463
end

// -------------------------------------------------------------------------- //
// Save the tabulation as a Stata file
// -------------------------------------------------------------------------- //

// Using "saveold" is a useful precaution to make sure R will be able to read
// the file even if have a very recent version of Stata
saveold "tabulation-input.dta", version(11) replace

// -------------------------------------------------------------------------- //
// Call R from Stata and run the interpolation in it
// -------------------------------------------------------------------------- //

rsource, terminator(END_OF_R) roptions(--vanilla)

    // 'haven' is a R package for importing Stata '.dta' file
    library(haven)

    // 'gpinter' is the R package to perform generalized Pareto interpolation
    library(gpinter)

    // Import the Stata data into R
    data <- read_dta("tabulation-input.dta")

    // Perform interpolation
    distribution <- tabulation_fit(
        p = data$p,
        thr = data$threshold,
        bracketavg = data$bracketavg,
        average = data$average[1]
    )

    // Percentiles to include in the output
    percentiles_output <- c(
        seq(0, 0.99, 0.01), // Every percentile
        seq(0.991, 0.999, 0.001), // Every 1/10 of a percentile in top 1%
        seq(0.9991, 0.9999, 0.0001), // Every 1/100 of a percentile in top 0.1%
        seq(0.99991, 0.99999, 0.00001) // Every 1/1000 of a percentile in top 0.01%
    )

    // Create a tabulation for these detailed percentiles
    tabulation <- generate_tabulation(distribution, percentiles_output)

    // You may only keep the columns you are interested in by removing one of
    // these rows. You can also rename the columns by changing the names on the
    // left of the equal sign.
    tabulation <- data.frame(
        p               = tabulation$fractile,
        threshold       = tabulation$threshold,
        top_share       = tabulation$top_share,
        bottom_share    = tabulation$bottom_share,
        bracket_share   = tabulation$bracket_share,
        top_average     = tabulation$top_average,
        bottom_average  = tabulation$bottom_average,
        bracket_average = tabulation$bracket_average,
        invpareto       = tabulation$invpareto
    )

    // Export the detailed tabulation
    write_dta(tabulation, "tabulation-output.dta")

END_OF_R

// -------------------------------------------------------------------------- //
// Import the results of the R program in Stata
// -------------------------------------------------------------------------- //

use "tabulation-output.dta", clear

// You can now use the interpolation results in Stata...

results matching ""

    No results matching ""