Installation of Elasticsearch

This page describes the first installation of Elasticsearch on a single server. Elasticsearch is required in FOLIO from  R3-2021 (Kiwi) on.

Assumed OS: Ubuntu 20.04.2 LTS

- Install Elasticsearch with Docker (Reference: https://www.elastic.co/guide/en/elasticsearch/reference/7.5/docker.html)
  sudo docker pull docker.elastic.co/elasticsearch/elasticsearch:7.5.2
  # Increase max map count:
  sudo sysctl -w vm.max_map_count=262144

# Spin up an Elasticsearch cluster with 3 nodes:

  # Create a file docker-compose.yml with the following contents in your working directory

version: '2.2'
services:
  es01:
    image: docker.elastic.co/elasticsearch/elasticsearch:7.5.2
    container_name: es01
    environment:
      - node.name=es01
      - cluster.name=es-docker-cluster
      - discovery.seed_hosts=es02,es03
      - cluster.initial_master_nodes=es01,es02,es03
      - bootstrap.memory_lock=true
      - "ES_JAVA_OPTS=-Xms512m -Xmx512m"
    ulimits:
      memlock:
        soft: -1
        hard: -1
    volumes:
      - data01:/usr/share/elasticsearch/data
    ports:
      - 9200:9200
    networks:
      - elastic
  es02:
    image: docker.elastic.co/elasticsearch/elasticsearch:7.5.2
    container_name: es02
    environment:
      - node.name=es02
      - cluster.name=es-docker-cluster
      - discovery.seed_hosts=es01,es03
      - cluster.initial_master_nodes=es01,es02,es03
      - bootstrap.memory_lock=true
      - "ES_JAVA_OPTS=-Xms512m -Xmx512m"
    ulimits:
      memlock:
        soft: -1
        hard: -1
    volumes:
      - data02:/usr/share/elasticsearch/data
    networks:
    - elastic
  es03:
    image: docker.elastic.co/elasticsearch/elasticsearch:7.5.2
    container_name: es03
    environment:
      - node.name=es03
      - cluster.name=es-docker-cluster
      - discovery.seed_hosts=es01,es02
      - cluster.initial_master_nodes=es01,es02,es03
      - bootstrap.memory_lock=true
      - "ES_JAVA_OPTS=-Xms512m -Xmx512m"
    ulimits:
      memlock:
        soft: -1
        hard: -1
    volumes:
      - data03:/usr/share/elasticsearch/data
    networks:
      - elastic

volumes:
  data01:
    driver: local
  data02:
    driver: local
  data03:
    driver: local
networks:
  elastic:
    driver: bridge


  sudo docker-compose up -d
  sudo docker run --rm centos:7 /bin/bash -c 'ulimit -Hn && ulimit -Sn && ulimit -Hu && ulimit -Su'
  # No point your browser to:
   http://localhost:9200/   --> "You know, for Search"
  # Elasticsearch has been started in cluster mode
  curl -X GET "localhost:9200/_cat/nodes?v&pretty"
  [ now following: https://github.com/folio-org/mod-search#configuring-elasticsearch ]

Install the Elasticsearch Plugins


  # Log in to the es01 Elasticsearch container:

  sudo docker exec -it es01 sh
  # Inside the container, do:

  cd /usr/share/elasticsearch
  # Install plugins one after the other : 
  elasticsearch-plugin install --batch analysis-icu
  elasticsearch-plugin install --batch analysis-kuromoji
  elasticsearch-plugin install --batch analysis-smartcn
  elasticsearch-plugin install --batch analysis-nori
  elasticsearch-plugin install --batch analysis-phonetic

  exit

  # Now do the same thing inside the other two Elasticsearch containers es02  and es03.

Now restart all the 3 elasticsearch containers, otherwise the Plugins will not be enabled !! :

sudo docker restart es01
sudo docker restart es02
sudo docker restart es03

  # Note: Plugins will be re-loaded upon restart of the container, i.e. they need to be installed only once per container.

Check, if the plugins have been installed. Log in into one docker container:

sudo docker exec -it es01 sh
sh-4.2# cd plugins/
sh-4.2# ls
analysis-icu  analysis-kuromoji  analysis-nori  analysis-phonetic  analysis-smartcn
exit


The following steps only need to be done in the process of upgrading an existing FOLIO installation from "without Elasticsearch (e.g. Juniper)" to "with Elasticsearch (e.g. also Juniper, or Kiwi)".

+++ !!! STOP HERE if you are doing a fresh (first) install of a FOLIO system and return to your installation instructions !!! +++


  # Now re-deploy mod-search with environment variables that point to elasticsearch:
  [ following https://github.com/folio-org/mod-search#environment-variables ]
  cd ~/folio-install
  curl -X GET -o mod-search-1.4.4-module-descriptor.json -D - -w '\n' http://localhost:9130/_/proxy/modules/mod-search-1.4.4
  # if you are working with Juniper R2-2021-GA, use module version mod-search-1.4.3 instead !!  

vim mod-search-1.4.4-module-descriptor.json
      "name" : "ELASTICSEARCH_HOST"
      "value" : "10.9.X.X"  # The IP of your single server or the server on which you are running elasticsearch, respectively
      "name" : "ELASTICSEARCH_URL",
      "value" : "http://10.9.X.X:9200"
      "name" : "ELASTICSEARCH_USERNAME",
      "value" : "elastic"
      "name" : "ELASTICSEARCH_PASSWORD",
      "value" : "s3cret" # Or choose a safer password
      "name" : "INITIAL_LANGUAGES",
      "value" : "eng, ger, swe" 
      "name" : "SYSTEM_USER_PASSWORD",
      "value" : "*******" # Use the same value as your are using for mod-pubsub !!
  curl -X DELETE -D - -w '\n' http://localhost:9130/_/proxy/modules/mod-search-1.4.4
  curl -i -w '\n' -X POST -H 'Content-type: application/json' -d @mod-search-1.4.4-module-descriptor.json http://localhost:9130/_/proxy/modules
  ./deploy-backend-module.sh mod-search-1.4.4 10.9.X.X [ here is the install script — will be moved to anticipated script location ]
  # Enable mod-search for your client:

Create a file search-install.json with the following content:

[
  {
    "id": "mod-search-1.4.4",
    "action": "enable"
  }
]


  curl -w '\n' -D - -X POST -H "Content-type: application/json" -d @search-install.json http://localhost:9130/_/proxy/tenants/diku/install?simulate=true\&preRelease=false
  curl -w '\n' -D - -X POST -H "Content-type: application/json" -d @search-install.json http://localhost:9130/_/proxy/tenants/diku/install?deploy=false\&preRelease=false\&tenantParameters=loadReference%3Dtrue%2CloadSample%3Dfalse

Create a new Frontend

# ui-inventory-es and ui-search need to be added to the frontend modules

  cd ~/platform-complete
  # Re-add mod-search-1.4.4, folio_search and folio_inventory-es in install.json (in case you had removed them):
  git restore install.json
  # Re-add mod-search-1.4.4 to okapi-install.json (in case you had removed it):
  git restore okapi-install.json
  # Re-add foilo_search and folio_inventory-es to stripes-install.json (in case you had removed them):
  git restore stripes-install.json
  # Re-add @folio/search and @folio/inventory-es to package.json  (in case you had removed them):
  git restore package.json
  # Re-add @folio/search and @folio/inventory-es to stripes.config.js (in case you had removed them):
  Add them manually, because you have also changed the url and possibly added a branding.

# Post the frontend modules folio_search and folio_inventory_es to Okapi's install endpoint to enable them for your tenant:

Create a file search-ui-install.json with the following conent:

[
  {
    "id": "folio_inventory-es-6.4.0",
    "action": "enable"
  },
  {
    "id": "folio_search-4.1.0",
    "action": "enable"
  }
]


  curl -w '\n' -D - -X POST -H "Content-type: application/json" -d @search-ui-install.json http://localhost:9130/_/proxy/tenants/diku/install?simulate=true\&preRelease=false
  curl -w '\n' -D - -X POST -H "Content-type: application/json" -d @search-ui-install.json http://localhost:9130/_/proxy/tenants/diku/install?preRelease=false

# Create a new FOLIO Stripes platform (with folio_search and folio_inventory-es)
# Installation of Stripes and nginx in a Docker container
  cd ~/platform-complete
  vim docker/Dockerfile
    ARG OKAPI_URL=https://<YOUR_DOMAIN_NAME>/okapi
    ARG TENANT_ID=diku  # Change to your TENANT_ID

  vim docker/nginx.conf
    server_name <YOUR_SERVER_NAME>;
    ...
    # back-end requests:
    location /okapi {
      rewrite ^/okapi/(.*) /$1 break;
      proxy_pass http://<YOUR_IP_ADDRESS>:9130/;

  vim stripes.config.js
    Edit tenant and Okapi-URL:
      okapi: { 'url':'https:/<YOUR_DOMAIN_NAME>/okapi', 'tenant':'diku' },
      ...
      Add Logo and favicon as you like.

  # Build the docker container:
  cd ~/platform-complete
  sudo su
  docker build -f docker/Dockerfile --build-arg OKAPI_URL=https://<YOUR_DOMAIN_NAME>/okapi --build-arg TENANT_ID=diku -t stripes .
Step 1/19 : FROM node:15-alpine as stripes_build
...
Step 19/19 : ENTRYPOINT ["/usr/bin/entrypoint.sh"]
 ---> Running in a47dce4e3b3e
Removing intermediate container a47dce4e3b3e
 ---> 48a532266f21
Successfully built 48a532266f21
Successfully tagged stripes:latest

# That takes approx. 15 minutes to build
  # Start the docker container. Redirect port 80 from the outer world to port 80 inside the container. When using SSL, also port 443 needs to be redirected (not part of this documentation):
  # Attention ! The old Stripes container has been automatically stopped and removed.
  Release port 80:
  netstat -taupn | grep 80
  tcp6       0      0 :::80                   :::*                    LISTEN      216581/docker-proxy
  tcp        0      0 0.0.0.0:80              0.0.0.0:*               LISTEN      216573/docker-proxy
  kill 216581
  kill 216573
  # Stop nginx on your server (if it is still running): sudo service nginx stop

  # Now finally start the docker container:
  nohup docker run -d -p 80:80 stripes

  # Log in to the Docker container
  docker exec -it <id> sh
  # Check if your config file has been installed correctly:
  vi /etc/nginx/conf.d/default.conf
  # Follow the webserver log inside the container:
  tail -f /var/log/nginx/host.access.log

Create Elasticsearch Index

[# See what you can already do with your Elasticsearch installation now:
     https://github.com/folio-org/mod-search#recreating-elasticsearch-index ]
  # Create Elasticsearch index for the first time
  Assign the following permission to user diku_admin:
    search.index.inventory.reindex.post (Search - starts inventory reindex operation)
  Get a new Token:
   export TOKEN=$( curl -s -S -D - -H "X-Okapi-Tenant: diku" -H "Content-type: application/json" -H "Accept: application/json" -d '{ "tenant" : "diku", "username" : "diku_admin", "password" : "admin" }' http://localhost:9130/authn/login | grep -i "^x-okapi-token: " )
  curl -w '\n' -D - -X POST -H "$TOKEN" -H "X-Okapi-Tenant: diku" -H "Content-type: application/json" -d '{ "recreateIndex": true, "resourceName": "instance" }' http://localhost:9130/search/index/inventory/reindex
HTTP/1.1 200 OK
vary: origin
Content-Type: application/json
Date: Tue, 08 Feb 2022 10:52:12 GMT
transfer-encoding: chunked

{"id":"02c8e76a-0606-43f2-808e-86f3c48b65c6","jobStatus":"In progress","submittedDate":"2022-02-08T10:52:12.466+00:00"}

  Things are being written to okapi.log like nuts now: /inventory-view RES 200 mod-inventory-storage ...
  Posting to the endpoint /search/index/inventory/reindex has caused actions on all 3 elasticsearch containers (nodes). Therefore, the endpoint has been installed correctly.
 Indexing of 200,000 instances takes 5-6 minutes.

   # Monitoring reindex process ( https://github.com/folio-org/mod-search#monitoring-reindex-process )
There is no end-to-end monitoring implemented yet, however it is possible to monitor it partially. In order to check how many records published to Kafka topic use inventory API:
    curl -w '\n' -D - -X GET -H "$TOKEN" -H "X-Okapi-Tenant: diku" -H "Content-type: application/json" http://localhost:9130/instance-storage/reindex/02c8e76a-0606-43f2-808e-86f3c48b65c6
HTTP/1.1 200 OK
vary: origin
Content-Type: application/json
transfer-encoding: chunked

{
  "id" : "02c8e76a-0606-43f2-808e-86f3c48b65c6",
  "published" : 224823,
  "jobStatus" : "Ids published",
  "submittedDate" : "2022-02-08T10:52:12.466+00:00"
}
  Yep, that's right !

  # Look, what has been really added to your elasticsearch index:
  # In order to estimate total records that actually have been added to the index, you can send a "match all" search query and check totalRecords
  curl -w '\n' -D - -X GET -H "$TOKEN" -H "X-Okapi-Tenant: diku" -H "Content-type: application/json" http://localhost:9130/search/instances?query=id="*"
 {"totalRecords":224823,"instances":[{"id":"1399845b-6d1f-50fd-9ffc-5d94e06f550d","title":"Molekulare Motoren und künstliche Nanomaschinen : Energiewandlung in Polymeren ; Moritz Mickler ; Thorsten Hugel , Physik in unserer Zeit","contributors":[{"name":"Mickler, Moritz","primary":false},{"name":"Hugel, Thorsten","primary":false}]}, ...]}

  # Get a specific ID out of your elasticsearch index:
curl -w '\n' -D - -X GET -H "$TOKEN" -H "X-Okapi-Tenant: diku" -H "Content-type: application/json" http://localhost:9130/search/instances?query=id="365e1e50-5ae5-590c-913a-e15c0fb45dd6"
HTTP/1.1 200 OK
transfer-encoding: chunked

{"totalRecords":1,"instances":[{"id":"365e1e50-5ae5-590c-913a-e15c0fb45dd6","title":"Wie klein ist klein : Eine kurze Geschichte der Quanten ; Aus d. Amerikan. von Michael Schmidt","contributors":[{"name":"Ford, Kenneth W.","primary":false}],"publication":[{"publisher":"Ullstein","dateOfPublication":"2008"}]}]}

  # The main endpoint that provides search capabilities is GET /search/instances.
  curl -w '\n' -D - -X GET -H "$TOKEN" -H "X-Okapi-Tenant: diku" -H "Content-type: application/json" http://localhost:9130/search/instances?query=title+all+"semantic web"
  A sample query: "semantic" and "web" must both be part of the title:
  curl -w '\n' -D - -X GET -H "$TOKEN" -H "X-Okapi-Tenant: diku" -H "Content-type: application/json" http://localhost:9130/search/instances?query=title+all+"semantic+web"    => 1 hit
  A sample query "semantic" or "web" need be in the title:
  curl -w '\n' -D - -X GET -H "$TOKEN" -H "X-Okapi-Tenant: diku" -H "Content-type: application/json" http://localhost:9130/search/instances?query=title+any+"semantic+web"  => 174 hits
 Likewise : Inventory - Instance -CQL-Query 'title any "semantic web"' => 174 hits.

# END of installation of elasticsearch and mod-search