我想知道如何在ggplot上添加回归线方程和R^2。我的代码是:

library(ggplot2)

df <- data.frame(x = c(1:100))
df$y <- 2 + 3 * df$x + rnorm(100, sd = 40)
p <- ggplot(data = df, aes(x = x, y = y)) +
            geom_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
            geom_point()
p

任何帮助都将不胜感激。


当前回答

我修改了Ramnath的帖子,a)使其更通用,以便它接受线性模型作为参数,而不是数据帧;b)更适当地显示负号。

lm_eqn = function(m) {

  l <- list(a = format(coef(m)[1], digits = 2),
      b = format(abs(coef(m)[2]), digits = 2),
      r2 = format(summary(m)$r.squared, digits = 3));

  if (coef(m)[2] >= 0)  {
    eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2,l)
  } else {
    eq <- substitute(italic(y) == a - b %.% italic(x)*","~~italic(r)^2~"="~r2,l)    
  }

  as.character(as.expression(eq));                 
}

用法将更改为:

p1 = p + geom_text(aes(x = 25, y = 300, label = lm_eqn(lm(y ~ x, df))), parse = TRUE)

其他回答

我修改了stat_smooth和相关函数的源代码中的几行代码,以创建一个添加了拟合方程和R平方值的新函数。这也适用于小面图!

library(devtools)
source_gist("524eade46135f6348140")
df = data.frame(x = c(1:100))
df$y = 2 + 5 * df$x + rnorm(100, sd = 40)
df$class = rep(1:2,50)
ggplot(data = df, aes(x = x, y = y, label=y)) +
  stat_smooth_func(geom="text",method="lm",hjust=0,parse=TRUE) +
  geom_smooth(method="lm",se=FALSE) +
  geom_point() + facet_wrap(~class)

我使用了@Ramnath回答中的代码来格式化方程。stat_smooth_func函数不是很健壮,但是使用它应该不难。

https://gist.github.com/kdauria/524eade46135f6348140。如果出现错误,请尝试更新ggplot2。

这里有一个解决方案

# GET EQUATION AND R-SQUARED AS STRING
# SOURCE: https://groups.google.com/forum/#!topic/ggplot2/1TgH-kG5XMA

lm_eqn <- function(df){
    m <- lm(y ~ x, df);
    eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2, 
         list(a = format(unname(coef(m)[1]), digits = 2),
              b = format(unname(coef(m)[2]), digits = 2),
             r2 = format(summary(m)$r.squared, digits = 3)))
    as.character(as.expression(eq));
}

p1 <- p + geom_text(x = 25, y = 300, label = lm_eqn(df), parse = TRUE)

编辑。我找到了这个代码的来源。下面是ggplot2谷歌组中原始帖子的链接

真的很喜欢@Ramnath的解决方案。为了允许使用自定义回归公式(而不是固定为y和x作为字面变量名),并将p值添加到打印输出中(正如@Jerry T评论的那样),下面是mod:

lm_eqn <- function(df, y, x){
    formula = as.formula(sprintf('%s ~ %s', y, x))
    m <- lm(formula, data=df);
    # formating the values into a summary string to print out
    # ~ give some space, but equal size and comma need to be quoted
    eq <- substitute(italic(target) == a + b %.% italic(input)*","~~italic(r)^2~"="~r2*","~~p~"="~italic(pvalue), 
         list(target = y,
              input = x,
              a = format(as.vector(coef(m)[1]), digits = 2), 
              b = format(as.vector(coef(m)[2]), digits = 2), 
             r2 = format(summary(m)$r.squared, digits = 3),
             # getting the pvalue is painful
             pvalue = format(summary(m)$coefficients[2,'Pr(>|t|)'], digits=1)
            )
          )
    as.character(as.expression(eq));                 
}

geom_point() +
  ggrepel::geom_text_repel(label=rownames(mtcars)) +
  geom_text(x=3,y=300,label=lm_eqn(mtcars, 'hp','wt'),color='red',parse=T) +
  geom_smooth(method='lm')

不幸的是,这对facet_wrap或facet_grid不起作用。

类似于@zx8754和@kdauria的答案,只是使用了ggplot2和ggpubr。我更喜欢使用ggpubr,因为它不需要自定义函数,比如这个问题的顶部答案。

library(ggplot2)
library(ggpubr)

df <- data.frame(x = c(1:100))
df$y <- 2 + 3 * df$x + rnorm(100, sd = 40)

ggplot(data = df, aes(x = x, y = y)) +
  stat_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
  geom_point() +
  stat_cor(aes(label = paste(..rr.label..)), # adds R^2 value
           r.accuracy = 0.01,
           label.x = 0, label.y = 375, size = 4) +
  stat_regline_equation(aes(label = ..eq.label..), # adds equation to linear regression
                        label.x = 0, label.y = 400, size = 4)

也可以把p值加到上图中吗

ggplot(data = df, aes(x = x, y = y)) +
  stat_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
  geom_point() +
  stat_cor(aes(label = paste(..rr.label.., ..p.label.., sep = "~`,`~")), # adds R^2 and p-value
           r.accuracy = 0.01,
           p.accuracy = 0.001,
           label.x = 0, label.y = 375, size = 4) +
  stat_regline_equation(aes(label = ..eq.label..), # adds equation to linear regression
                        label.x = 0, label.y = 400, size = 4)

当您有多个组时,也可以很好地使用facet_wrap()

df$group <- rep(1:2,50)

ggplot(data = df, aes(x = x, y = y)) +
  stat_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
  geom_point() +
  stat_cor(aes(label = paste(..rr.label.., ..p.label.., sep = "~`,`~")),
           r.accuracy = 0.01,
           p.accuracy = 0.001,
           label.x = 0, label.y = 375, size = 4) +
  stat_regline_equation(aes(label = ..eq.label..),
                        label.x = 0, label.y = 400, size = 4) +
  theme_bw() +
  facet_wrap(~group)

另一种选择是创建一个自定义函数,使用dplyr和broom库生成方程:

get_formula <- function(model) {
  
  broom::tidy(model)[, 1:2] %>%
    mutate(sign = ifelse(sign(estimate) == 1, ' + ', ' - ')) %>% #coeff signs
    mutate_if(is.numeric, ~ abs(round(., 2))) %>% #for improving formatting
    mutate(a = ifelse(term == '(Intercept)', paste0('y ~ ', estimate), paste0(sign, estimate, ' * ', term))) %>%
    summarise(formula = paste(a, collapse = '')) %>%
    as.character
  
}

lm(y ~ x, data = df) -> model
get_formula(model)
#"y ~ 6.22 + 3.16 * x"

scales::percent(summary(model)$r.squared, accuracy = 0.01) -> r_squared

现在我们需要将文本添加到情节中:

p + 
  geom_text(x = 20, y = 300,
            label = get_formula(model),
            color = 'red') +
  geom_text(x = 20, y = 285,
            label = r_squared,
            color = 'blue')