#!/bin/sh
#/ Usage: ghe-backup-es-rsync
#/ Take an online, incremental snapshot of Elasticsearch indices.
#/
#/ Note: This command typically isn't called directly. It's invoked by
#/ ghe-backup when the rsync strategy is used.
set -e

# Bring in the backup configuration
cd $(dirname "$0")/../..
. share/github-backup-utils/ghe-backup-config

# Set up remote host and root elastic backup directory based on config
host="$GHE_HOSTNAME"

# Perform a host-check and establish GHE_REMOTE_XXX variables.
ghe_remote_version_required "$host"

# Verify rsync is available.
if ! rsync --version 1>/dev/null 2>&1; then
    echo "Error: rsync not found." 1>&2
    exit 1
fi

# Make sure root backup dir exists if this is the first run
mkdir -p "$GHE_SNAPSHOT_DIR/elasticsearch"

# Verify that the /data/elasticsearch directory exists.
if ! ghe-ssh "$host" -- "[ -d '$GHE_REMOTE_DATA_USER_DIR/elasticsearch' ]"; then
    echo "* The '$GHE_REMOTE_DATA_USER_DIR/elasticsearch' directory doesn't exist." 1>&3
    exit 0
fi

# Grab the elasticsearch.yml file which is root owned and mode -rw------- so
# can't be read via rsync or cat. We use the root allowed grep -F as a cat
# replacement. This is necessary on v11.10.x appliances only.
if [ "$GHE_VERSION_MAJOR" -lt 2 ]; then
    echo "* Retrieving elasticsearch.yml config file ..." 1>&3
    ghe-ssh "$host" -- "sudo grep -F '' '$GHE_REMOTE_DATA_USER_DIR/elasticsearch/elasticsearch.yml'" \
        > "$GHE_SNAPSHOT_DIR/elasticsearch/elasticsearch.yml"
    chmod 0600 "$GHE_SNAPSHOT_DIR/elasticsearch/elasticsearch.yml"
fi

# If we have a previous increment, avoid transferring existing files via rsync's
# --link-dest support. This also decreases physical space usage considerably.
if [ -d "$GHE_DATA_DIR/current/elasticsearch" ]; then
    link_dest="--link-dest=../../current/elasticsearch"
fi

# Determine which user to run the rsync operation under. This is the git user on
# v11.10.34x appliances and the elasticsearch user under >= v2.x appliances.
if [ "$GHE_VERSION_MAJOR" -eq 1 ]; then
    rsync_user=git
elif [ "$GHE_VERSION_MAJOR" -ge 2 ]; then
    rsync_user=elasticsearch
else
    echo "Error: invalid remote version: $GHE_REMOTE_VERSION" 1>&2
    exit 1
fi

# Transfer ES indices from a GitHub instance to the current snapshot
# directory, using a previous snapshot to avoid transferring files that have
# already been transferred.
echo "* Performing initial sync of ES indices ..." 1>&3
ghe-rsync -avz \
    -e "ghe-ssh -p $(ssh_port_part "$host")" \
    --rsync-path="sudo -u $rsync_user rsync" \
    $link_dest \
    --exclude='elasticsearch.yml' \
    "$(ssh_host_part "$host"):$GHE_REMOTE_DATA_USER_DIR/elasticsearch/" \
    "$GHE_SNAPSHOT_DIR/elasticsearch" 1>&3

# Set up a trap to re-enable flushing on exit
cleanup () {
    echo "* Enabling ES index flushing ..." 1>&3
    echo '{"index":{"translog.disable_flush":false}}' |
    ghe-ssh "$host" -- curl -s -XPUT "localhost:9200/_settings" -d @- >/dev/null
}
trap 'cleanup' EXIT
trap 'exit $?' INT # ^C always terminate

# Disable ES flushing and force a flush right now
echo "* Disabling ES index flushing ..." 1>&3
echo '{"index":{"translog.disable_flush":true}}' |
ghe-ssh "$host" -- curl -s -XPUT  "localhost:9200/_settings" -d @- >/dev/null
ghe-ssh "$host" -- curl -s -XPOST "localhost:9200/_flush" >/dev/null

# Transfer all ES indices again
echo "* Performing follow-up sync of ES indices ..." 1>&3
ghe-rsync -avz \
    -e "ghe-ssh -p $(ssh_port_part "$host")" \
    --rsync-path="sudo -u $rsync_user rsync" \
    $link_dest \
    --exclude='elasticsearch.yml' \
    "$(ssh_host_part "$host"):$GHE_REMOTE_DATA_USER_DIR/elasticsearch/" \
    "$GHE_SNAPSHOT_DIR/elasticsearch" 1>&3
