[Spike] mod-oai-pmh performance metrics

MODOAIPMH-349 - Getting issue details... STATUS  - [SPIKE]: Add metrics to mod-oai-pmh 

RolePersonComments
Solution Architect
Java LeadDone
UI Lead
Product Owner


Requirements

The metrics should be posted to logs. It should be done once per minute and only if there is a request during this minute. The needed metrics are:

  • The time, that is taken by the download instances call.
  • The average time of response from inventory-storage.
  • The average time of response from SRS.
  • The average time of marshaling of object to XML response.
  • The average time of the whole time of processing of one instance (from the instances table).

PoC

Operations should be described as enum:

MetricOperation
package org.folio.oaipmh.service;

public enum MetricOperation {
  PARSE_XML,
  SEND_REQUEST;
}


A separate service should be responsible for metrics logging. Logs should be saved in separate file. The service calculates the start and end times of the operation in order to log this information.

MetricsCollectingService
package org.folio.oaipmh.service;

import lombok.*;
import lombok.extern.log4j.Log4j2;
import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;

import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.locks.ReentrantLock;
import java.util.stream.Collectors;

@Component
@EnableScheduling
@Log4j2
public class MetricsCollectingService {

  @Getter
  @Setter
  @AllArgsConstructor
  @NoArgsConstructor
  private static class Hit {
    @With
    private long start;
    @With
    private long end;
  }

  private static final ConcurrentMap<String, Hit> hits = new ConcurrentHashMap<>();
  private static final ReentrantLock lock = new ReentrantLock();

  @Scheduled(fixedDelay = 500, initialDelay = 500)
  private void scheduledMetricsCollectionTask() {
    lock.lock();
    try {
      if (!hits.isEmpty()) {
        Map<String, Double> averages = hits.entrySet().stream()
          .filter(e -> e.getValue().getStart() != 0 && e.getValue().getEnd() != 0)
          .collect(Collectors.groupingBy(Map.Entry::getKey, Collectors.averagingDouble(x -> x.getValue().getEnd() - x.getValue().getStart())));
        averages.forEach((operation, average) -> log.info("Operation: {}  ---> Avg. {}", operation, average));
        averages.forEach((key, value) -> hits.remove(key));
        hits.forEach((key, value) -> log.info("Unpaired value: {}", key));
      }
    } finally {
      lock.unlock();
    }
  }

  public void startMetric(String requestId, MetricOperation operation) {
    var start = System.currentTimeMillis();
    log.info("Operation: {}, start: {}", requestId + "_" + operation.name(), start);
    hits.put(requestId + "_" + operation.name(), new Hit().withStart(start));
  }

  public void endMetric(String requestId, MetricOperation operation) {
    var end = System.currentTimeMillis();
    log.info("Operation: {}, end: {}", requestId + "_" + operation.name(), end);
    hits.put(requestId + "_" + operation.name(), hits.get(requestId + "_" + operation.name()).withEnd(end));
  }
}

Verified by unit test:

package org.folio.oaipmh.service.impl;

import io.vertx.core.Context;
import io.vertx.core.Vertx;
import io.vertx.junit5.VertxExtension;
import io.vertx.junit5.VertxTestContext;
import org.folio.config.ApplicationConfig;
import org.folio.oaipmh.service.MetricOperation;
import org.folio.oaipmh.service.MetricsCollectingService;
import org.folio.spring.SpringContextUtil;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestInstance;
import org.junit.jupiter.api.extension.ExtendWith;
import org.springframework.beans.factory.annotation.Autowired;

import java.util.concurrent.ThreadLocalRandom;

import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS;

@TestInstance(PER_CLASS)
@ExtendWith(VertxExtension.class)
public class MetricsCollectingServiceTest {

  @Autowired
  MetricsCollectingService service;

  @BeforeAll
  void setUpClass(Vertx vertx, VertxTestContext testContext) {
    Context context = vertx.getOrCreateContext();
    SpringContextUtil.init(vertx, context, ApplicationConfig.class);
    SpringContextUtil.autowireDependencies(this, context);
    testContext.completeNow();
  }

  @Test
  void test() throws InterruptedException {
    for (int i = 0; i < 5; i++) {
      var operation = operation();
      service.startMetric(String.valueOf(i), operation);
      Thread.sleep(ThreadLocalRandom.current().nextInt(1000));
      service.endMetric(String.valueOf(i), operation);
      Thread.sleep(ThreadLocalRandom.current().nextInt(1000));
    }
    Thread.sleep(1000000);
  }

  private MetricOperation operation() {
    int pick = ThreadLocalRandom.current().nextInt(MetricOperation.values().length);
    return MetricOperation.values()[pick];
  }

}


Console output:

2021-12-06T13:34:22,971 INFO  [main                     ] MetricsCollectingService Operation: 0_SEND_REQUEST, start: 1638786862971
2021-12-06T13:34:23,467 INFO  [pool-2-thread-1          ] MetricsCollectingService Unpaired value: 0_SEND_REQUEST
2021-12-06T13:34:23,666 INFO  [main                     ] MetricsCollectingService Operation: 0_SEND_REQUEST, end: 1638786863666
2021-12-06T13:34:23,971 INFO  [pool-2-thread-1          ] MetricsCollectingService Operation: 0_SEND_REQUEST  ---> Avg. 695.0
2021-12-06T13:34:24,038 INFO  [main                     ] MetricsCollectingService Operation: 1_SEND_REQUEST, start: 1638786864038
2021-12-06T13:34:24,387 INFO  [main                     ] MetricsCollectingService Operation: 1_SEND_REQUEST, end: 1638786864387
2021-12-06T13:34:24,477 INFO  [pool-2-thread-1          ] MetricsCollectingService Operation: 1_SEND_REQUEST  ---> Avg. 349.0
2021-12-06T13:34:24,917 INFO  [main                     ] MetricsCollectingService Operation: 2_PARSE_XML, start: 1638786864917
2021-12-06T13:34:24,979 INFO  [pool-2-thread-1          ] MetricsCollectingService Unpaired value: 2_PARSE_XML
2021-12-06T13:34:25,483 INFO  [pool-2-thread-1          ] MetricsCollectingService Unpaired value: 2_PARSE_XML
2021-12-06T13:34:25,619 INFO  [main                     ] MetricsCollectingService Operation: 2_PARSE_XML, end: 1638786865619
2021-12-06T13:34:25,986 INFO  [pool-2-thread-1          ] MetricsCollectingService Operation: 2_PARSE_XML  ---> Avg. 702.0
2021-12-06T13:34:26,554 INFO  [main                     ] MetricsCollectingService Operation: 3_SEND_REQUEST, start: 1638786866554
2021-12-06T13:34:26,897 INFO  [main                     ] MetricsCollectingService Operation: 3_SEND_REQUEST, end: 1638786866897
2021-12-06T13:34:26,996 INFO  [pool-2-thread-1          ] MetricsCollectingService Operation: 3_SEND_REQUEST  ---> Avg. 343.0
2021-12-06T13:34:27,483 INFO  [main                     ] MetricsCollectingService Operation: 4_PARSE_XML, start: 1638786867483
2021-12-06T13:34:27,500 INFO  [pool-2-thread-1          ] MetricsCollectingService Unpaired value: 4_PARSE_XML
2021-12-06T13:34:27,767 INFO  [main                     ] MetricsCollectingService Operation: 4_PARSE_XML, end: 1638786867767
2021-12-06T13:34:28,004 INFO  [pool-2-thread-1          ] MetricsCollectingService Operation: 4_PARSE_XML  ---> Avg. 284.0