我想知道如何在ggplot上添加回归线方程和R^2。我的代码是:

library(ggplot2)

df <- data.frame(x = c(1:100))
df$y <- 2 + 3 * df$x + rnorm(100, sd = 40)
p <- ggplot(data = df, aes(x = x, y = y)) +
            geom_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
            geom_point()
p

任何帮助都将不胜感激。


当前回答

下面是对每个人来说最简单的代码

注:显示皮尔森的Rho,而不是R^2。

library(ggplot2)
library(ggpubr)

df <- data.frame(x = c(1:100)
df$y <- 2 + 3 * df$x + rnorm(100, sd = 40)
p <- ggplot(data = df, aes(x = x, y = y)) +
        geom_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
        geom_point()+
        stat_cor(label.y = 35)+ #this means at 35th unit in the y axis, the r squared and p value will be shown
        stat_regline_equation(label.y = 30) #this means at 30th unit regresion line equation will be shown

p

其他回答

类似于@zx8754和@kdauria的答案,只是使用了ggplot2和ggpubr。我更喜欢使用ggpubr,因为它不需要自定义函数,比如这个问题的顶部答案。

library(ggplot2)
library(ggpubr)

df <- data.frame(x = c(1:100))
df$y <- 2 + 3 * df$x + rnorm(100, sd = 40)

ggplot(data = df, aes(x = x, y = y)) +
  stat_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
  geom_point() +
  stat_cor(aes(label = paste(..rr.label..)), # adds R^2 value
           r.accuracy = 0.01,
           label.x = 0, label.y = 375, size = 4) +
  stat_regline_equation(aes(label = ..eq.label..), # adds equation to linear regression
                        label.x = 0, label.y = 400, size = 4)

也可以把p值加到上图中吗

ggplot(data = df, aes(x = x, y = y)) +
  stat_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
  geom_point() +
  stat_cor(aes(label = paste(..rr.label.., ..p.label.., sep = "~`,`~")), # adds R^2 and p-value
           r.accuracy = 0.01,
           p.accuracy = 0.001,
           label.x = 0, label.y = 375, size = 4) +
  stat_regline_equation(aes(label = ..eq.label..), # adds equation to linear regression
                        label.x = 0, label.y = 400, size = 4)

当您有多个组时,也可以很好地使用facet_wrap()

df$group <- rep(1:2,50)

ggplot(data = df, aes(x = x, y = y)) +
  stat_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
  geom_point() +
  stat_cor(aes(label = paste(..rr.label.., ..p.label.., sep = "~`,`~")),
           r.accuracy = 0.01,
           p.accuracy = 0.001,
           label.x = 0, label.y = 375, size = 4) +
  stat_regline_equation(aes(label = ..eq.label..),
                        label.x = 0, label.y = 400, size = 4) +
  theme_bw() +
  facet_wrap(~group)

我的包ggpmisc中的统计数据stat_poly_eq()使它可以根据线性模型拟合添加文本标签。

“ggpmisc”(>= 0.4.0)和“ggplot2”(>= 3.3.0)的答案已于2022-06-02更新。 在示例中,我使用stat_poly_line()而不是stat_smooth(),因为它的方法和公式的默认值与stat_poly_eq()相同。在所有代码示例中,我都省略了stat_poly_line()的附加参数,因为它们与添加标签的问题无关。

library(ggplot2)
library(ggpmisc)
#> Loading required package: ggpp
#> 
#> Attaching package: 'ggpp'
#> The following object is masked from 'package:ggplot2':
#> 
#>     annotate
# artificial data
df <- data.frame(x = c(1:100))
df$y <- 2 + 3 * df$x + rnorm(100, sd = 40)
df$yy <- 2 + 3 * df$x + 0.1 * df$x^2 + rnorm(100, sd = 40)

# using default formula, label and methods
ggplot(data = df, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq() +
  geom_point()

# assembling a single label with equation and R2
ggplot(data = df, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq(aes(label = paste(after_stat(eq.label),
                                 after_stat(rr.label), sep = "*\", \"*"))) +
  geom_point()

# adding separate labels with equation and R2
ggplot(data = df, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq(aes(label = after_stat(eq.label))) +
  stat_poly_eq(label.y = 0.9) +
  geom_point()

# regression through the origin
ggplot(data = df, aes(x = x, y = y)) +
  stat_poly_line(formula = y ~ x + 0) +
  stat_poly_eq(formula = y ~ x + 0, aes(label = after_stat(eq.label))) +
  geom_point()

# fitting a polynomial
ggplot(data = df, aes(x = x, y = yy)) +
  stat_poly_line(formula = y ~ poly(x, 2, raw = TRUE)) +
  stat_poly_eq(formula = y ~ poly(x, 2, raw = TRUE),
               aes(label = after_stat(eq.label))) +
  geom_point()

# adding a hat as asked by @MYaseen208 and @elarry
ggplot(data = df, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq(eq.with.lhs = "italic(hat(y))~`=`~",
               aes(label = paste(after_stat(eq.label),
                                 after_stat(rr.label), sep = "*\", \"*"))) +
  geom_point()

# variable substitution as asked by @shabbychef
# same labels in equation and axes
ggplot(data = df, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq(eq.with.lhs = "italic(h)~`=`~",
               eq.x.rhs = "~italic(z)",
               aes(label = after_stat(eq.label))) +
  labs(x = expression(italic(z)), y = expression(italic(h))) +
  geom_point()

# grouping as asked by @helen.h
dfg <- data.frame(x = c(1:100))
dfg$y <- 20 * c(0, 1) + 3 * df$x + rnorm(100, sd = 40)
dfg$group <- factor(rep(c("A", "B"), 50))

ggplot(data = dfg, aes(x = x, y = y, colour = group)) +
  stat_poly_line() +
  stat_poly_eq(aes(label = paste(after_stat(eq.label),
                                 after_stat(rr.label), sep = "*\", \"*"))) +
  geom_point()

ggplot(data = dfg, aes(x = x, y = y, linetype = group, grp.label = group)) +
  stat_poly_line() +
  stat_poly_eq(aes(label = paste(after_stat(grp.label), "*\": \"*",
                                 after_stat(eq.label), "*\", \"*",
                                 after_stat(rr.label), sep = ""))) +
  geom_point()

# a single fit with grouped data as asked by @Herman
ggplot(data = dfg, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq(aes(label = paste(after_stat(eq.label),
                                 after_stat(rr.label), sep = "*\", \"*"))) +
  geom_point(aes(colour = group))

# facets
ggplot(data = dfg, aes(x = x, y = y)) +
  stat_poly_line() +
  stat_poly_eq(aes(label = paste(after_stat(eq.label),
                                 after_stat(rr.label), sep = "*\", \"*"))) +
  geom_point() +
  facet_wrap(~group)

由reprex包于2022-06-02创建(v2.0.1)

这里有一个解决方案

# GET EQUATION AND R-SQUARED AS STRING
# SOURCE: https://groups.google.com/forum/#!topic/ggplot2/1TgH-kG5XMA

lm_eqn <- function(df){
    m <- lm(y ~ x, df);
    eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2, 
         list(a = format(unname(coef(m)[1]), digits = 2),
              b = format(unname(coef(m)[2]), digits = 2),
             r2 = format(summary(m)$r.squared, digits = 3)))
    as.character(as.expression(eq));
}

p1 <- p + geom_text(x = 25, y = 300, label = lm_eqn(df), parse = TRUE)

编辑。我找到了这个代码的来源。下面是ggplot2谷歌组中原始帖子的链接

我修改了Ramnath的帖子,a)使其更通用,以便它接受线性模型作为参数,而不是数据帧;b)更适当地显示负号。

lm_eqn = function(m) {

  l <- list(a = format(coef(m)[1], digits = 2),
      b = format(abs(coef(m)[2]), digits = 2),
      r2 = format(summary(m)$r.squared, digits = 3));

  if (coef(m)[2] >= 0)  {
    eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2,l)
  } else {
    eq <- substitute(italic(y) == a - b %.% italic(x)*","~~italic(r)^2~"="~r2,l)    
  }

  as.character(as.expression(eq));                 
}

用法将更改为:

p1 = p + geom_text(aes(x = 25, y = 300, label = lm_eqn(lm(y ~ x, df))), parse = TRUE)

受到这个答案中提供的方程风格的启发,一个更通用的方法(多个预测器+ latex输出作为选项)可以是:

print_equation= function(model, latex= FALSE, ...){
    dots <- list(...)
    cc= model$coefficients
    var_sign= as.character(sign(cc[-1]))%>%gsub("1","",.)%>%gsub("-"," - ",.)
    var_sign[var_sign==""]= ' + '

    f_args_abs= f_args= dots
    f_args$x= cc
    f_args_abs$x= abs(cc)
    cc_= do.call(format, args= f_args)
    cc_abs= do.call(format, args= f_args_abs)
    pred_vars=
        cc_abs%>%
        paste(., x_vars, sep= star)%>%
        paste(var_sign,.)%>%paste(., collapse= "")

    if(latex){
        star= " \\cdot "
        y_var= strsplit(as.character(model$call$formula), "~")[[2]]%>%
            paste0("\\hat{",.,"_{i}}")
        x_vars= names(cc_)[-1]%>%paste0(.,"_{i}")
    }else{
        star= " * "
        y_var= strsplit(as.character(model$call$formula), "~")[[2]]        
        x_vars= names(cc_)[-1]
    }

    equ= paste(y_var,"=",cc_[1],pred_vars)
    if(latex){
        equ= paste0(equ," + \\hat{\\varepsilon_{i}} \\quad where \\quad \\varepsilon \\sim \\mathcal{N}(0,",
                    summary(MetamodelKdifEryth)$sigma,")")%>%paste0("$",.,"$")
    }
    cat(equ)
}

model参数需要一个lm对象,latex参数是一个布尔值,要求一个简单的字符或一个乳胶格式的方程,而…参数将其值传递给format函数。

我还添加了一个选项来输出它为latex,这样你就可以在rmarkdown中使用这个函数:


```{r echo=FALSE, results='asis'}
print_equation(model = lm_mod, latex = TRUE)
```

现在使用它:

df <- data.frame(x = c(1:100))
df$y <- 2 + 3 * df$x + rnorm(100, sd = 40)
df$z <- 8 + 3 * df$x + rnorm(100, sd = 40)
lm_mod= lm(y~x+z, data = df)

print_equation(model = lm_mod, latex = FALSE)

这段代码产生: Y = 11.3382963933174 + 2.5893419 * x + 0.1002227 * z

如果我们要求一个乳胶方程,将参数四舍五入为3位:

print_equation(model = lm_mod, latex = TRUE, digits= 3)

这个收益率: