用MySQL计算中位数最简单(希望不会太慢)的方法是什么?我已经使用AVG(x)来寻找平均值,但我很难找到一个简单的方法来计算中位数。现在,我将所有的行返回到PHP,进行排序,然后选择中间的行,但是肯定有一些简单的方法可以在一个MySQL查询中完成它。

示例数据:

id | val
--------
 1    4
 2    7
 3    2
 4    2
 5    9
 6    8
 7    3

对val排序得到2 2 3 4 7 8 9,因此中位数应该是4,而SELECT AVG(val) == 5。


当前回答

MariaDB / MySQL:

SELECT AVG(dd.val) as median_val
FROM (
SELECT d.val, @rownum:=@rownum+1 as `row_number`, @total_rows:=@rownum
  FROM data d, (SELECT @rownum:=0) r
  WHERE d.val is NOT NULL
  -- put some where clause here
  ORDER BY d.val
) as dd
WHERE dd.row_number IN ( FLOOR((@total_rows+1)/2), FLOOR((@total_rows+2)/2) );

Steve Cohen指出,在第一次传递之后,@rownum将包含总行数。这可用于确定中值,因此不需要第二次传递或连接。

此外,AVG(dd.val)和dd.row_number IN(…)用于在有偶数条记录时正确地产生中位数。推理:

SELECT FLOOR((3+1)/2),FLOOR((3+2)/2); -- when total_rows is 3, avg rows 2 and 2
SELECT FLOOR((4+1)/2),FLOOR((4+2)/2); -- when total_rows is 4, avg rows 2 and 3

最后,MariaDB 10.3.3+包含一个MEDIAN函数

其他回答

在某些情况下,中位数的计算如下:

“中位数”是数字列表中按值排序时的“中间”值。对于偶数集,中位数是两个中间值的平均值。 我为此创建了一个简单的代码:

$midValue = 0;
$rowCount = "SELECT count(*) as count {$from} {$where}";

$even = FALSE;
$offset = 1;
$medianRow = floor($rowCount / 2);
if ($rowCount % 2 == 0 && !empty($medianRow)) {
  $even = TRUE;
  $offset++;
  $medianRow--;
}

$medianValue = "SELECT column as median 
               {$fromClause} {$whereClause} 
               ORDER BY median 
               LIMIT {$medianRow},{$offset}";

$medianValDAO = db_query($medianValue);
while ($medianValDAO->fetch()) {
  if ($even) {
    $midValue = $midValue + $medianValDAO->median;
  }
  else {
    $median = $medianValDAO->median;
  }
}
if ($even) {
  $median = $midValue / 2;
}
return $median;

返回的$中位数将是所需的结果:-)

我发现接受的解决方案在我的MySQL安装上不起作用,返回一个空集,但这个查询在我测试的所有情况下都适用:

SELECT x.val from data x, data y
GROUP BY x.val
HAVING SUM(SIGN(1-SIGN(y.val-x.val)))/COUNT(*) > .5
LIMIT 1
set @r = 0;

select  
    case when mod(c,2)=0 then round(sum(lat_N),4)
    else round(sum(lat_N)/2,4) 
    end as Med  
from 
    (select lat_N, @r := @r+1, @r as id from station order by lat_N) A
    cross join
    (select (count(1)+1)/2 as c from station) B
where id >= floor(c) and id <=ceil(c)

MySQL文档中这一页的注释有以下建议:

-- (mostly) High Performance scaling MEDIAN function per group
-- Median defined in http://en.wikipedia.org/wiki/Median
--
-- by Peter Hlavac
-- 06.11.2008
--
-- Example Table:

DROP table if exists table_median;
CREATE TABLE table_median (id INTEGER(11),val INTEGER(11));
COMMIT;


INSERT INTO table_median (id, val) VALUES
(1, 7), (1, 4), (1, 5), (1, 1), (1, 8), (1, 3), (1, 6),
(2, 4),
(3, 5), (3, 2),
(4, 5), (4, 12), (4, 1), (4, 7);



-- Calculating the MEDIAN
SELECT @a := 0;
SELECT
id,
AVG(val) AS MEDIAN
FROM (
SELECT
id,
val
FROM (
SELECT
-- Create an index n for every id
@a := (@a + 1) mod o.c AS shifted_n,
IF(@a mod o.c=0, o.c, @a) AS n,
o.id,
o.val,
-- the number of elements for every id
o.c
FROM (
SELECT
t_o.id,
val,
c
FROM
table_median t_o INNER JOIN
(SELECT
id,
COUNT(1) AS c
FROM
table_median
GROUP BY
id
) t2
ON (t2.id = t_o.id)
ORDER BY
t_o.id,val
) o
) a
WHERE
IF(
-- if there is an even number of elements
-- take the lower and the upper median
-- and use AVG(lower,upper)
c MOD 2 = 0,
n = c DIV 2 OR n = (c DIV 2)+1,

-- if its an odd number of elements
-- take the first if its only one element
-- or take the one in the middle
IF(
c = 1,
n = 1,
n = c DIV 2 + 1
)
)
) a
GROUP BY
id;

-- Explanation:
-- The Statement creates a helper table like
--
-- n id val count
-- ----------------
-- 1, 1, 1, 7
-- 2, 1, 3, 7
-- 3, 1, 4, 7
-- 4, 1, 5, 7
-- 5, 1, 6, 7
-- 6, 1, 7, 7
-- 7, 1, 8, 7
--
-- 1, 2, 4, 1

-- 1, 3, 2, 2
-- 2, 3, 5, 2
--
-- 1, 4, 1, 4
-- 2, 4, 5, 4
-- 3, 4, 7, 4
-- 4, 4, 12, 4


-- from there we can select the n-th element on the position: count div 2 + 1 

归档完美中位数的单个查询:

SELECT 
COUNT(*) as total_rows, 
IF(count(*)%2 = 1, CAST(SUBSTRING_INDEX(SUBSTRING_INDEX( GROUP_CONCAT(val ORDER BY val SEPARATOR ','), ',', 50/100 * COUNT(*)), ',', -1) AS DECIMAL), ROUND((CAST(SUBSTRING_INDEX(SUBSTRING_INDEX( GROUP_CONCAT(val ORDER BY val SEPARATOR ','), ',', 50/100 * COUNT(*) + 1), ',', -1) AS DECIMAL) + CAST(SUBSTRING_INDEX(SUBSTRING_INDEX( GROUP_CONCAT(val ORDER BY val SEPARATOR ','), ',', 50/100 * COUNT(*)), ',', -1) AS DECIMAL)) / 2)) as median, 
AVG(val) as average 
FROM 
data