实时时间序列数据中的峰值信号检测

更新:到目前为止表现最好的算法是这个。

这个问题探讨了在实时时间序列数据中检测突然峰值的稳健算法。

考虑以下示例数据:

这个数据的例子是Matlab格式的(但这个问题不是关于语言，而是关于算法):

p = [1 1 1.1 1 0.9 1 1 1.1 1 0.9 1 1.1 1 1 0.9 1 1 1.1 1 1 1 1 1.1 0.9 1 1.1 1 1 0.9, ...
     1 1.1 1 1 1.1 1 0.8 0.9 1 1.2 0.9 1 1 1.1 1.2 1 1.5 1 3 2 5 3 2 1 1 1 0.9 1 1, ... 
     3 2.6 4 3 3.2 2 1 1 0.8 4 4 2 2.5 1 1 1];

你可以清楚地看到有三个大峰和一些小峰。这个数据集是问题所涉及的时间序列数据集类的一个特定示例。这类数据集有两个一般特征:

有一种具有一般平均值的基本噪声有很大的“峰值”或“更高的数据点”明显偏离噪声。

让我们假设以下情况:

峰的宽度不能事先确定峰的高度明显偏离其他值算法实时更新(因此每个新数据点都会更新)

对于这种情况，需要构造一个触发信号的边值。但是，边界值不能是静态的，必须通过算法实时确定。

我的问题是:什么是实时计算这些阈值的好算法?有没有针对这种情况的特定算法?最著名的算法是什么?

健壮的算法或有用的见解都受到高度赞赏。(可以用任何语言回答:这是关于算法的)

当前回答

一个python/numpy的迭代版本的答案https://stackoverflow.com/a/22640362/6029703在这里。对于大数据(100000+)，此代码比计算平均和标准偏差的速度更快。

def peak_detection_smoothed_zscore_v2(x, lag, threshold, influence):
    '''
    iterative smoothed z-score algorithm
    Implementation of algorithm from https://stackoverflow.com/a/22640362/6029703
    '''
    import numpy as np
    labels = np.zeros(len(x))
    filtered_y = np.array(x)
    avg_filter = np.zeros(len(x))
    std_filter = np.zeros(len(x))
    var_filter = np.zeros(len(x))

    avg_filter[lag - 1] = np.mean(x[0:lag])
    std_filter[lag - 1] = np.std(x[0:lag])
    var_filter[lag - 1] = np.var(x[0:lag])
    for i in range(lag, len(x)):
        if abs(x[i] - avg_filter[i - 1]) > threshold * std_filter[i - 1]:
            if x[i] > avg_filter[i - 1]:
                labels[i] = 1
            else:
                labels[i] = -1
            filtered_y[i] = influence * x[i] + (1 - influence) * filtered_y[i - 1]
        else:
            labels[i] = 0
            filtered_y[i] = x[i]
        # update avg, var, std
        avg_filter[i] = avg_filter[i - 1] + 1. / lag * (filtered_y[i] - filtered_y[i - lag])
        var_filter[i] = var_filter[i - 1] + 1. / lag * ((filtered_y[i] - avg_filter[i - 1]) ** 2 - (
            filtered_y[i - lag] - avg_filter[i - 1]) ** 2 - (filtered_y[i] - filtered_y[i - lag]) ** 2 / lag)
        std_filter[i] = np.sqrt(var_filter[i])

    return dict(signals=labels,
                avgFilter=avg_filter,
                stdFilter=std_filter)

2018-09-09 15:40:00

其他回答

原文的附录1:Matlab和R翻译

Matlab代码

function [signals,avgFilter,stdFilter] = ThresholdingAlgo(y,lag,threshold,influence)
% Initialise signal results
signals = zeros(length(y),1);
% Initialise filtered series
filteredY = y(1:lag+1);
% Initialise filters
avgFilter(lag+1,1) = mean(y(1:lag+1));
stdFilter(lag+1,1) = std(y(1:lag+1));
% Loop over all datapoints y(lag+2),...,y(t)
for i=lag+2:length(y)
    % If new value is a specified number of deviations away
    if abs(y(i)-avgFilter(i-1)) > threshold*stdFilter(i-1)
        if y(i) > avgFilter(i-1)
            % Positive signal
            signals(i) = 1;
        else
            % Negative signal
            signals(i) = -1;
        end
        % Make influence lower
        filteredY(i) = influence*y(i)+(1-influence)*filteredY(i-1);
    else
        % No signal
        signals(i) = 0;
        filteredY(i) = y(i);
    end
    % Adjust the filters
    avgFilter(i) = mean(filteredY(i-lag:i));
    stdFilter(i) = std(filteredY(i-lag:i));
end
% Done, now return results
end

例子:

% Data
y = [1 1 1.1 1 0.9 1 1 1.1 1 0.9 1 1.1 1 1 0.9 1 1 1.1 1 1,...
    1 1 1.1 0.9 1 1.1 1 1 0.9 1 1.1 1 1 1.1 1 0.8 0.9 1 1.2 0.9 1,...
    1 1.1 1.2 1 1.5 1 3 2 5 3 2 1 1 1 0.9 1,...
    1 3 2.6 4 3 3.2 2 1 1 0.8 4 4 2 2.5 1 1 1];

% Settings
lag = 30;
threshold = 5;
influence = 0;

% Get results
[signals,avg,dev] = ThresholdingAlgo(y,lag,threshold,influence);

figure; subplot(2,1,1); hold on;
x = 1:length(y); ix = lag+1:length(y);
area(x(ix),avg(ix)+threshold*dev(ix),'FaceColor',[0.9 0.9 0.9],'EdgeColor','none');
area(x(ix),avg(ix)-threshold*dev(ix),'FaceColor',[1 1 1],'EdgeColor','none');
plot(x(ix),avg(ix),'LineWidth',1,'Color','cyan','LineWidth',1.5);
plot(x(ix),avg(ix)+threshold*dev(ix),'LineWidth',1,'Color','green','LineWidth',1.5);
plot(x(ix),avg(ix)-threshold*dev(ix),'LineWidth',1,'Color','green','LineWidth',1.5);
plot(1:length(y),y,'b');
subplot(2,1,2);
stairs(signals,'r','LineWidth',1.5); ylim([-1.5 1.5]);

R代码

ThresholdingAlgo <- function(y,lag,threshold,influence) {
  signals <- rep(0,length(y))
  filteredY <- y[0:lag]
  avgFilter <- NULL
  stdFilter <- NULL
  avgFilter[lag] <- mean(y[0:lag], na.rm=TRUE)
  stdFilter[lag] <- sd(y[0:lag], na.rm=TRUE)
  for (i in (lag+1):length(y)){
    if (abs(y[i]-avgFilter[i-1]) > threshold*stdFilter[i-1]) {
      if (y[i] > avgFilter[i-1]) {
        signals[i] <- 1;
      } else {
        signals[i] <- -1;
      }
      filteredY[i] <- influence*y[i]+(1-influence)*filteredY[i-1]
    } else {
      signals[i] <- 0
      filteredY[i] <- y[i]
    }
    avgFilter[i] <- mean(filteredY[(i-lag):i], na.rm=TRUE)
    stdFilter[i] <- sd(filteredY[(i-lag):i], na.rm=TRUE)
  }
  return(list("signals"=signals,"avgFilter"=avgFilter,"stdFilter"=stdFilter))
}

例子:

# Data
y <- c(1,1,1.1,1,0.9,1,1,1.1,1,0.9,1,1.1,1,1,0.9,1,1,1.1,1,1,1,1,1.1,0.9,1,1.1,1,1,0.9,
       1,1.1,1,1,1.1,1,0.8,0.9,1,1.2,0.9,1,1,1.1,1.2,1,1.5,1,3,2,5,3,2,1,1,1,0.9,1,1,3,
       2.6,4,3,3.2,2,1,1,0.8,4,4,2,2.5,1,1,1)

lag       <- 30
threshold <- 5
influence <- 0

# Run algo with lag = 30, threshold = 5, influence = 0
result <- ThresholdingAlgo(y,lag,threshold,influence)

# Plot result
par(mfrow = c(2,1),oma = c(2,2,0,0) + 0.1,mar = c(0,0,2,1) + 0.2)
plot(1:length(y),y,type="l",ylab="",xlab="") 
lines(1:length(y),result$avgFilter,type="l",col="cyan",lwd=2)
lines(1:length(y),result$avgFilter+threshold*result$stdFilter,type="l",col="green",lwd=2)
lines(1:length(y),result$avgFilter-threshold*result$stdFilter,type="l",col="green",lwd=2)
plot(result$signals,type="S",col="red",ylab="",xlab="",ylim=c(-1.5,1.5),lwd=2)

这段代码(两种语言)将为原始问题的数据产生以下结果:

附录2原答案:Matlab演示代码

(点击创建数据)

function [] = RobustThresholdingDemo()

%% SPECIFICATIONS
lag         = 5;       % lag for the smoothing
threshold   = 3.5;     % number of st.dev. away from the mean to signal
influence   = 0.3;     % when signal: how much influence for new data? (between 0 and 1)
                       % 1 is normal influence, 0.5 is half      
%% START DEMO
DemoScreen(30,lag,threshold,influence);

end

function [signals,avgFilter,stdFilter] = ThresholdingAlgo(y,lag,threshold,influence)
signals = zeros(length(y),1);
filteredY = y(1:lag+1);
avgFilter(lag+1,1) = mean(y(1:lag+1));
stdFilter(lag+1,1) = std(y(1:lag+1));
for i=lag+2:length(y)
    if abs(y(i)-avgFilter(i-1)) > threshold*stdFilter(i-1)
        if y(i) > avgFilter(i-1)
            signals(i) = 1;
        else
            signals(i) = -1;
        end
        filteredY(i) = influence*y(i)+(1-influence)*filteredY(i-1);
    else
        signals(i) = 0;
        filteredY(i) = y(i);
    end
    avgFilter(i) = mean(filteredY(i-lag:i));
    stdFilter(i) = std(filteredY(i-lag:i));
end
end

% Demo screen function
function [] = DemoScreen(n,lag,threshold,influence)
figure('Position',[200 100,1000,500]);
subplot(2,1,1);
title(sprintf(['Draw data points (%.0f max)      [settings: lag = %.0f, '...
    'threshold = %.2f, influence = %.2f]'],n,lag,threshold,influence));
ylim([0 5]); xlim([0 50]);
H = gca; subplot(2,1,1);
set(H, 'YLimMode', 'manual'); set(H, 'XLimMode', 'manual');
set(H, 'YLim', get(H,'YLim')); set(H, 'XLim', get(H,'XLim'));
xg = []; yg = [];
for i=1:n
    try
        [xi,yi] = ginput(1);
    catch
        return;
    end
    xg = [xg xi]; yg = [yg yi];
    if i == 1
        subplot(2,1,1); hold on;
        plot(H, xg(i),yg(i),'r.'); 
        text(xg(i),yg(i),num2str(i),'FontSize',7);
    end
    if length(xg) > lag
        [signals,avg,dev] = ...
            ThresholdingAlgo(yg,lag,threshold,influence);
        area(xg(lag+1:end),avg(lag+1:end)+threshold*dev(lag+1:end),...
            'FaceColor',[0.9 0.9 0.9],'EdgeColor','none');
        area(xg(lag+1:end),avg(lag+1:end)-threshold*dev(lag+1:end),...
            'FaceColor',[1 1 1],'EdgeColor','none');
        plot(xg(lag+1:end),avg(lag+1:end),'LineWidth',1,'Color','cyan');
        plot(xg(lag+1:end),avg(lag+1:end)+threshold*dev(lag+1:end),...
            'LineWidth',1,'Color','green');
        plot(xg(lag+1:end),avg(lag+1:end)-threshold*dev(lag+1:end),...
            'LineWidth',1,'Color','green');
        subplot(2,1,2); hold on; title('Signal output');
        stairs(xg(lag+1:end),signals(lag+1:end),'LineWidth',2,'Color','blue');
        ylim([-2 2]); xlim([0 50]); hold off;
    end
    subplot(2,1,1); hold on;
    for j=2:i
        plot(xg([j-1:j]),yg([j-1:j]),'r'); plot(H,xg(j),yg(j),'r.');
        text(xg(j),yg(j),num2str(j),'FontSize',7);
    end
end
end

2019-02-03 20:37:54

一种方法是根据以下观察来检测峰:

时间t是一个峰值(y (t) > y (t - 1)) & & ((t) > y (t + 1))

它通过等待上升趋势结束来避免误报。它并不完全是“实时”的，因为它会比峰值差一个dt。灵敏度可以通过要求比较的裕度来控制。在噪声检测和时延检测之间存在一种折衷。您可以通过添加更多参数来丰富模型:

峰如果y (y (t) - (t-dt) > m) && (y (t) - y (t + dt) > m)

dt和m是控制灵敏度和延时的参数

这是你用上述算法得到的结果:

下面是在python中重现图的代码:

import numpy as np
import matplotlib.pyplot as plt
input = np.array([ 1. ,  1. ,  1. ,  1. ,  1. ,  1. ,  1. ,  1.1,  1. ,  0.8,  0.9,
    1. ,  1.2,  0.9,  1. ,  1. ,  1.1,  1.2,  1. ,  1.5,  1. ,  3. ,
    2. ,  5. ,  3. ,  2. ,  1. ,  1. ,  1. ,  0.9,  1. ,  1. ,  3. ,
    2.6,  4. ,  3. ,  3.2,  2. ,  1. ,  1. ,  1. ,  1. ,  1. ])
signal = (input > np.roll(input,1)) & (input > np.roll(input,-1))
plt.plot(input)
plt.plot(signal.nonzero()[0], input[signal], 'ro')
plt.show()

通过设置m = 0.5，你可以得到一个更清晰的信号，只有一个假阳性:

2014-03-22 21:23:53

鲁棒峰值检测算法(使用z-scores)

I came up with an algorithm that works very well for these types of datasets. It is based on the principle of dispersion: if a new datapoint is a given x number of standard deviations away from some moving mean, the algorithm signals (also called z-score). The algorithm is very robust because it constructs a separate moving mean and deviation, such that signals do not corrupt the threshold. Future signals are therefore identified with approximately the same accuracy, regardless of the amount of previous signals. The algorithm takes 3 inputs: lag = the lag of the moving window, threshold = the z-score at which the algorithm signals and influence = the influence (between 0 and 1) of new signals on the mean and standard deviation. For example, a lag of 5 will use the last 5 observations to smooth the data. A threshold of 3.5 will signal if a datapoint is 3.5 standard deviations away from the moving mean. And an influence of 0.5 gives signals half of the influence that normal datapoints have. Likewise, an influence of 0 ignores signals completely for recalculating the new threshold. An influence of 0 is therefore the most robust option (but assumes stationarity); putting the influence option at 1 is least robust. For non-stationary data, the influence option should therefore be put somewhere between 0 and 1.

其工作原理如下:

伪代码

# Let y be a vector of timeseries data of at least length lag+2
# Let mean() be a function that calculates the mean
# Let std() be a function that calculates the standard deviaton
# Let absolute() be the absolute value function

# Settings (these are examples: choose what is best for your data!)
set lag to 5;          # average and std. are based on past 5 observations
set threshold to 3.5;  # signal when data point is 3.5 std. away from average
set influence to 0.5;  # between 0 (no influence) and 1 (full influence)

# Initialize variables
set signals to vector 0,...,0 of length of y;   # Initialize signal results
set filteredY to y(1),...,y(lag)                # Initialize filtered series
set avgFilter to null;                          # Initialize average filter
set stdFilter to null;                          # Initialize std. filter
set avgFilter(lag) to mean(y(1),...,y(lag));    # Initialize first value average
set stdFilter(lag) to std(y(1),...,y(lag));     # Initialize first value std.

for i=lag+1,...,t do
  if absolute(y(i) - avgFilter(i-1)) > threshold*stdFilter(i-1) then
    if y(i) > avgFilter(i-1) then
      set signals(i) to +1;                     # Positive signal
    else
      set signals(i) to -1;                     # Negative signal
    end
    set filteredY(i) to influence*y(i) + (1-influence)*filteredY(i-1);
  else
    set signals(i) to 0;                        # No signal
    set filteredY(i) to y(i);
  end
  set avgFilter(i) to mean(filteredY(i-lag+1),...,filteredY(i));
  set stdFilter(i) to std(filteredY(i-lag+1),...,filteredY(i));
end

下面是为数据选择良好参数的经验法则。

Demo

这个演示的Matlab代码可以在这里找到。要使用演示，只需运行它并单击上面的图表自己创建一个时间序列。算法在绘制滞后观测数后开始工作。

结果

对于原始问题，当使用以下设置时，该算法将给出以下输出:滞后= 30，阈值= 5，影响= 0:

在不同编程语言中的实现:

Matlab (me) R (me) Golang (Xeoncross) Golang [efficient version] (Micah Parks) Python (R Kiselev) Python [efficient version] (delica) Swift (me) Groovy (JoshuaCWebDeveloper) C++ [interactive parameters] (Jason C) C++ (Animesh Pandey) Rust (swizard) Scala (Mike Roberts) Kotlin (leoderprofi) Ruby (Kimmo Lehto) Fortran [for resonance detection] (THo) Julia (Matt Camp) C# (Ocean Airdrop) C (DavidC) Java (takanuva15) JavaScript (Dirk Lüsebrink) TypeScript (Jerry Gamble) Perl (Alen) PHP (radhoo) PHP (gtjamesa) Dart (Sga)

配置算法的经验法则

lag: the lag parameter determines how much your data will be smoothed and how adaptive the algorithm is to changes in the long-term average of the data. The more stationary your data is, the more lags you should include (this should improve the robustness of the algorithm). If your data contains time-varying trends, you should consider how quickly you want the algorithm to adapt to these trends. I.e., if you put lag at 10, it takes 10 'periods' before the algorithm's treshold is adjusted to any systematic changes in the long-term average. So choose the lag parameter based on the trending behavior of your data and how adaptive you want the algorithm to be.

influence: this parameter determines the influence of signals on the algorithm's detection threshold. If put at 0, signals have no influence on the threshold, such that future signals are detected based on a threshold that is calculated with a mean and standard deviation that is not influenced by past signals. If put at 0.5, signals have half the influence of normal data points. Another way to think about this is that if you put the influence at 0, you implicitly assume stationarity (i.e. no matter how many signals there are, you always expect the time series to return to the same average over the long term). If this is not the case, you should put the influence parameter somewhere between 0 and 1, depending on the extent to which signals can systematically influence the time-varying trend of the data. E.g., if signals lead to a structural break of the long-term average of the time series, the influence parameter should be put high (close to 1) so the threshold can react to structural breaks quickly.

threshold: the threshold parameter is the number of standard deviations from the moving mean above which the algorithm will classify a new datapoint as being a signal. For example, if a new datapoint is 4.0 standard deviations above the moving mean and the threshold parameter is set as 3.5, the algorithm will identify the datapoint as a signal. This parameter should be set based on how many signals you expect. For example, if your data is normally distributed, a threshold (or: z-score) of 3.5 corresponds to a signaling probability of 0.00047 (from this table), which implies that you expect a signal once every 2128 datapoints (1/0.00047). The threshold therefore directly influences how sensitive the algorithm is and thereby also determines how often the algorithm signals. Examine your own data and choose a sensible threshold that makes the algorithm signal when you want it to (some trial-and-error might be needed here to get to a good threshold for your purpose).

警告:上面的代码每次运行时都会遍历所有的数据点。在实现这段代码时，请确保将信号的计算拆分为一个单独的函数(没有循环)。然后当一个新的数据点到达时，更新filteredY, avgFilter和stdFilter一次。不要每次有新的数据点时都重新计算所有数据的信号(就像上面的例子一样)，这在实时应用程序中是非常低效和缓慢的。

其他修改算法的方法(为了潜在的改进)有:

使用中位数而不是平均值使用稳健的尺度测量，如中位数绝对偏差(MAD)，而不是标准偏差使用信号裕度，这样信号就不会频繁切换更改影响参数的工作方式区别对待上下信号(不对称处理) 创建一个单独的影响参数的平均值和标准(在这个Swift翻译)

(已知)学术引用此StackOverflow的答案:

Cai, Y., Wang, X., Joos, G., & Kamwa, I. (2022). An Online Data-Driven Method to Locate Forced Oscillation Sources from Power Plants Based on Sparse Identification of Nonlinear Dynamics (SINDy). IEEE Transactions on Power Systems. Yang, S., Yim, J., Kim, J., & Shin, H. V. (2022). CatchLive: Real-time Summarization of Live Streams with Stream Content and Interaction Data. CHI Conference on Human Factors in Computing Systems, 1-20. Feng, D., Tan, Z., Engwirda, D., Liao, C., Xu, D., Bisht, G., ... & Leung, R. (2022). Investigating coastal backwater effects and flooding in the coastal zone using a global river transport model on an unstructured mesh. Hydrology and Earth System Sciences Discussions, 1-31 [preprint]. Link, J., Perst, T., Stoeve, M., & Eskofier, B. M. (2022). Wearable sensors for activity recognition in ultimate frisbee using convolutional neural networks and transfer learning. Sensors, 22(7), 2560. Romeiro, J. M. N., Torres, F. T. P., & Pirotti, F. (2021). Evaluation of Effect of Prescribed Fires Using Spectral Indices and SAR Data. Bollettino della società italiana di fotogrammetria e topografia, (2), 36-56. Moore, J., Goffin, P., Wiese, J., & Meyer, M. (2021). An Interview Method for Engaging Personal Data. Proceedings of the ACM on Interactive, Mobile, Wearable and Ubiquitous Technologies, 5(4), 1-28. Rykov, Y., Thach, T. Q., Bojic, I., Christopoulos, G., & Car, J. (2021). Digital Biomarkers for Depression Screening With Wearable Devices: Cross-sectional Study With Machine Learning Modeling. JMIR mHealth and uHealth, 9(10), e24872. Hong, Y., Xin, Y., Martin, H., Bucher, D., & Raubal, M. (2021). A Clustering-Based Framework for Individual Travel Behaviour Change Detection. In 11th International Conference on Geographic Information Science (GIScience 2021)-Part II. Grammenos, A., Kalyvianaki, E., & Pietzuch, P. (2021). Pronto: Federated Task Scheduling. arXiv preprint arXiv:2104.13429. Courtial, N. (2020). Fusion d’images multimodales pour l’assistance de procédures d’électrophysiologie cardiaque. Doctoral dissertation, Université Rennes. Beckman, W. F., Jiménez, M. Á. L., Moerland, P. D., Westerhoff, H. V., & Verschure, P. J. (2020). 4sUDRB-sequencing for genome-wide transcription bursting quantification in breast cancer cells. bioRxiv. Olkhovskiy, M., Müllerová, E., & Martínek, P. (2020). Impulse signals classification using one dimensional convolutional neural network. Journal of Electrical Engineering, 71(6), 397-405. Gao, S., & Calderon, D. P. (2020). Robust alternative to the righting reflex to assess arousal in rodents. Scientific reports, 10(1), 1-11. Chen, G. & Dong, W. (2020). Reactive Jamming and Attack Mitigation over Cross-Technology Communication Links. ACM Transactions on Sensor Networks, 17(1). Takahashi, R., Fukumoto, M., Han, C., Sasatani, T., Narusue, Y., & Kawahara, Y. (2020). TelemetRing: A Batteryless and Wireless Ring-shaped Keyboard using Passive Inductive Telemetry. In Proceedings of the 33rd Annual ACM Symposium on User Interface Software and Technology (pp. 1161-1168). Negus, M. J., Moore, M. R., Oliver, J. M., Cimpeanu, R. (2020). Droplet impact onto a spring-supported plate: analysis and simulations. Journal of Engineering Mathematics, 128(3). Yin, C. (2020). Dinucleotide repeats in coronavirus SARS-CoV-2 genome: evolutionary implications. ArXiv e-print, accessible from: https://arxiv.org/pdf/2006.00280.pdf Esnaola-Gonzalez, I., Gómez-Omella, M., Ferreiro, S., Fernandez, I., Lázaro, I., & García, E. (2020). An IoT Platform Towards the Enhancement of Poultry Production Chains. Sensors, 20(6), 1549. Gao, S., & Calderon, D. P. (2020). Continuous regimens of cortico-motor integration calibrate levels of arousal during emergence from anesthesia. bioRxiv. Cloud, B., Tarien, B., Liu, A., Shedd, T., Lin, X., Hubbard, M., ... & Moore, J. K. (2019). Adaptive smartphone-based sensor fusion for estimating competitive rowing kinematic metrics. PloS one, 14(12). Ceyssens, F., Carmona, M. B., Kil, D., Deprez, M., Tooten, E., Nuttin, B., ... & Puers, R. (2019). Chronic neural recording with probes of subcellular cross-section using 0.06 mm² dissolving microneedles as insertion device. Sensors and Actuators B: Chemical, 284, pp. 369-376. Dons, E., Laeremans, M., Orjuela, J. P., Avila-Palencia, I., de Nazelle, A., Nieuwenhuijsen, M., ... & Nawrot, T. (2019). Transport Most Likely to Cause Air Pollution Peak Exposures in Everyday Life: Evidence from over 2000 Days of Personal Monitoring. Atmospheric Environment, 213, 424-432. Schaible B.J., Snook K.R., Yin J., et al. (2019). Twitter conversations and English news media reports on poliomyelitis in five different countries, January 2014 to April 2015. The Permanente Journal, 23, 18-181. Lima, B. (2019). Object Surface Exploration Using a Tactile-Enabled Robotic Fingertip (Doctoral dissertation, Université d'Ottawa/University of Ottawa). Lima, B. M. R., Ramos, L. C. S., de Oliveira, T. E. A., da Fonseca, V. P., & Petriu, E. M. (2019). Heart Rate Detection Using a Multimodal Tactile Sensor and a Z-score Based Peak Detection Algorithm. CMBES Proceedings, 42. Lima, B. M. R., de Oliveira, T. E. A., da Fonseca, V. P., Zhu, Q., Goubran, M., Groza, V. Z., & Petriu, E. M. (2019, June). Heart Rate Detection Using a Miniaturized Multimodal Tactile Sensor. In 2019 IEEE International Symposium on Medical Measurements and Applications (MeMeA) (pp. 1-6). IEEE. Ting, C., Field, R., Quach, T., Bauer, T. (2019). Generalized Boundary Detection Using Compression-based Analytics. ICASSP 2019 - 2019 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), Brighton, United Kingdom, pp. 3522-3526. Carrier, E. E. (2019). Exploiting compression in solving discretized linear systems. Doctoral dissertation, University of Illinois at Urbana-Champaign. Khandakar, A., Chowdhury, M. E., Ahmed, R., Dhib, A., Mohammed, M., Al-Emadi, N. A., & Michelson, D. (2019). Portable system for monitoring and controlling driver behavior and the use of a mobile phone while driving. Sensors, 19(7), 1563. Baskozos, G., Dawes, J. M., Austin, J. S., Antunes-Martins, A., McDermott, L., Clark, A. J., ... & Orengo, C. (2019). Comprehensive analysis of long noncoding RNA expression in dorsal root ganglion reveals cell-type specificity and dysregulation after nerve injury. Pain, 160(2), 463. Cloud, B., Tarien, B., Crawford, R., & Moore, J. (2018). Adaptive smartphone-based sensor fusion for estimating competitive rowing kinematic metrics. engrXiv Preprints. Zajdel, T. J. (2018). Electronic Interfaces for Bacteria-Based Biosensing. Doctoral dissertation, UC Berkeley. Perkins, P., Heber, S. (2018). Identification of Ribosome Pause Sites Using a Z-Score Based Peak Detection Algorithm. IEEE 8th International Conference on Computational Advances in Bio and Medical Sciences (ICCABS), ISBN: 978-1-5386-8520-4. Moore, J., Goffin, P., Meyer, M., Lundrigan, P., Patwari, N., Sward, K., & Wiese, J. (2018). Managing In-home Environments through Sensing, Annotating, and Visualizing Air Quality Data. Proceedings of the ACM on Interactive, Mobile, Wearable and Ubiquitous Technologies, 2(3), 128. Lo, O., Buchanan, W. J., Griffiths, P., and Macfarlane, R. (2018), Distance Measurement Methods for Improved Insider Threat Detection, Security and Communication Networks, Vol. 2018, Article ID 5906368. Apurupa, N. V., Singh, P., Chakravarthy, S., & Buduru, A. B. (2018). A critical study of power consumption patterns in Indian Apartments. Doctoral dissertation, IIIT-Delhi. Scirea, M. (2017). Affective Music Generation and its effect on player experience. Doctoral dissertation, IT University of Copenhagen, Digital Design. Scirea, M., Eklund, P., Togelius, J., & Risi, S. (2017). Primal-improv: Towards co-evolutionary musical improvisation. Computer Science and Electronic Engineering (CEEC), 2017 (pp. 172-177). IEEE. Catalbas, M. C., Cegovnik, T., Sodnik, J. and Gulten, A. (2017). Driver fatigue detection based on saccadic eye movements, 10th International Conference on Electrical and Electronics Engineering (ELECO), pp. 913-917.

其他工作使用的算法从这个答案

Bergamini, E. and E. Mourlon-Druol (2021). Talking about Europe: exploring 70 years of news archives. Working Paper 04/2021, Bruegel. Cox, G. (2020). Peak Detection in a Measured Signal. Online article on https://www.baeldung.com/cs/signal-peak-detection. Raimundo, D. W. (2020). SwitP: Mobile Application for Real-Time Swimming Analysis.. Semester Thesis, ETH Zürich. Bernardi, D. (2019). A feasibility study on pairing a smartwatch and a mobile device through multi-modal gestures. Master thesis, Aalto University. Lemmens, E. (2018). Outlier detection in event logs by using statistical methods, Master thesis, University of Eindhoven. Willems, P. (2017). Mood controlled affective ambiences for the elderly, Master thesis, University of Twente. Ciocirdel, G. D. and Varga, M. (2016). Election Prediction Based on Wikipedia Pageviews. Project paper, Vrije Universiteit Amsterdam.

算法的其他应用从这个答案

Avo审计dbt包。Avo公司(下一代分析治理)。用OpenBCI系统合成语音，SarahK01。 Python包:机器学习金融实验室，基于De Prado, m.l.(2018)的工作。金融机器学习的进展。John Wiley & Sons。 Adafruit CircuitPlayground Library, Adafruit board (Adafruit Industries) 步长跟踪算法，Android App (jeeshnair) R包:《动物追踪者》(乔·钱皮恩、西娅·苏基安托)

链接到其他峰值检测算法

噪声正弦时间序列中的实时峰值检测

如何引用该算法:

Brakel, J.P.G. van(2014)。使用z分数的鲁棒峰值检测算法。堆栈溢出。下载地址:https://stackoverflow.com/questions/22583391/peak-signal-detection-in-realtime-timeseries-data/22640362#22640362(版本:2020-11-08)。

助理 @misc{brakel2014，作者= {Brakel, J.P.G van}，标题={使用z-scores的鲁棒峰值检测算法}，url = {https://stackoverflow.com/questions/22583391/peak-signal-detection-in-realtime-timeseries-data/22640362#22640362}，语言= {en}，年份= {2014}，urldate ={2022-04-12}，期刊= {Stack Overflow}， howpublished = {https://stackoverflow.com/questions/22583391/peak-signal-detection-in-realtime-timeseries-data/22640362#22640362}}

如果你在某个地方使用这个功能，请使用上面的参考资料来感谢我。如果你对算法有任何问题，请在下面的评论中发表，或在LinkedIn上与我联系。

2014-03-25 16:16:01

我为Jean-Paul最受欢迎的答案写了一个Go包。它假设y值的类型为float64。

github.com/MicahParks/peakdetect

下面的示例使用了这个包，并基于上面提到的流行答案中的R示例。它在编译时没有任何依赖关系，试图保持较低的内存占用，并且在有新数据点进入时不重新处理过去的点。该项目有100%的测试覆盖率，主要来自上述R示例的输入和输出。但是，如果有人发现任何错误，请打开一个GitHub问题。

编辑:我对v0.0.5进行了性能改进，似乎快了10倍!它使用Welford的方法进行初始化，并使用类似的方法计算滞后期(滑动窗口)的平均值和总体标准偏差。特别感谢另一个帖子的回答:https://stackoverflow.com/a/14638138/14797322

下面是基于R例子的Golang例子:

package main

import (
    "fmt"
    "log"

    "github.com/MicahParks/peakdetect"
)

// This example is the equivalent of the R example from the algorithm's author.
// https://stackoverflow.com/a/54507329/14797322
func main() {
    data := []float64{1, 1, 1.1, 1, 0.9, 1, 1, 1.1, 1, 0.9, 1, 1.1, 1, 1, 0.9, 1, 1, 1.1, 1, 1, 1, 1, 1.1, 0.9, 1, 1.1, 1, 1, 0.9, 1, 1.1, 1, 1, 1.1, 1, 0.8, 0.9, 1, 1.2, 0.9, 1, 1, 1.1, 1.2, 1, 1.5, 1, 3, 2, 5, 3, 2, 1, 1, 1, 0.9, 1, 1, 3, 2.6, 4, 3, 3.2, 2, 1, 1, 0.8, 4, 4, 2, 2.5, 1, 1, 1}

    // Algorithm configuration from example.
    const (
        lag       = 30
        threshold = 5
        influence = 0
    )

    // Create then initialize the peak detector.
    detector := peakdetect.NewPeakDetector()
    err := detector.Initialize(influence, threshold, data[:lag]) // The length of the initial values is the lag.
    if err != nil {
        log.Fatalf("Failed to initialize peak detector.\nError: %s", err)
    }

    // Start processing new data points and determine what signal, if any they produce.
    //
    // This method, .Next(), is best for when data is being processed in a stream, but this simply iterates over a slice.
    nextDataPoints := data[lag:]
    for i, newPoint := range nextDataPoints {
        signal := detector.Next(newPoint)
        var signalType string
        switch signal {
        case peakdetect.SignalNegative:
            signalType = "negative"
        case peakdetect.SignalNeutral:
            signalType = "neutral"
        case peakdetect.SignalPositive:
            signalType = "positive"
        }

        println(fmt.Sprintf("Data point at index %d has the signal: %s", i+lag, signalType))
    }

    // This method, .NextBatch(), is a helper function for processing many data points at once. It's returned slice
    // should produce the same signal outputs as the loop above.
    signals := detector.NextBatch(nextDataPoints)
    println(fmt.Sprintf("1:1 ratio of batch inputs to signal outputs: %t", len(signals) == len(nextDataPoints)))
}

2022-06-04 18:09:04

在信号处理中，峰值检测通常采用小波变换。基本上就是对时间序列数据进行离散小波变换。返回的细节系数中的过零将对应于时间序列信号中的峰值。你会在不同的细节系数水平上检测到不同的峰值振幅，这给了你多层次的分辨率。

2014-03-31 20:54:49

实时时间序列数据中的峰值信号检测

推荐文章

最新文章

标签