minified elasticsearch container

sample script pieces to create an minified clone of production elasticsearch

step 1: export data from production elasticsearch

with help of this script we will grap 1K of documents from each index, plus their mappings and analysis and save all that to data.json file

$max = 1000
$endpoint = 'https://xxxxxx.es.europe-west3.gcp.cloud.es.io:9243'
$username = 'elastic'
$password = 'xxxxxxx'

$headers =  @{Authorization=("Basic {0}" -f ([Convert]::ToBase64String([Text.Encoding]::ASCII.GetBytes(("{0}:{1}" -f $username,$password)))))}

$aliases = Invoke-RestMethod "$endpoint/_cat/aliases?format=json" -Headers $headers
$indices = Invoke-RestMethod "$endpoint/_cat/indices?format=json" -Headers $headers
$data = @()
$timer = [System.Diagnostics.Stopwatch]::StartNew()
foreach($index in $indices) {
    if ($index.index.StartsWith('.')) { continue }
    if ($index.index.StartsWith('apm')) { continue }

    $settings = Invoke-WebRequest -UseBasicParsing "$endpoint/$($index.index)/_settings" -Headers $headers | Select-Object -ExpandProperty Content | ConvertFrom-Json -AsHashtable

    $mapping = Invoke-WebRequest -UseBasicParsing "$endpoint/$($index.index)/_mapping" -Headers $headers | Select-Object -ExpandProperty Content | ConvertFrom-Json -AsHashtable

    $docs = Invoke-RestMethod -Method POST "$endpoint/$($index.index)/_search" -Headers $headers -ContentType 'application/json' -Body (ConvertTo-Json -Depth 100 -InputObject @{
        query = @{ match_all = @{} }
        size = $max
    }) | Select-Object -ExpandProperty hits -ErrorAction SilentlyContinue | Select-Object -ExpandProperty hits -ErrorAction SilentlyContinue
    if (-not $docs) {
        $docs = @()
    }

    $alias = $aliases | Where-Object index -EQ $($index.index) | Select-Object -ExpandProperty alias

    $data += [PSCustomObject]@{
        index = $index.index
        mapping = $mapping[$index.index]['mappings']
        docs = $docs
        alias = $alias
        analysis = $settings[$index.index]['settings']['index']['analysis']
    }

    # Write-Progress -Activity 'Exporting' -Status $index.index -PercentComplete ( [Array]::IndexOf($indices, $index) / $indices.Count * 100 )
    if ($alias) {
        Write-Host "$alias -> $($index.index) $($docs.Count) $($timer.Elapsed)"
    } else {
        Write-Host "$($index.index) $($docs.Count) $($timer.Elapsed)"
    }
}
ConvertTo-Json -Depth 100 -InputObject $data | Out-File -Encoding utf8 'data.json'
$size = [math]::round(( Get-ChildItem 'data.json' | Select-Object -ExpandProperty Size ) / 1024 / 1024)
$count = $data | Select-Object -ExpandProperty docs | Measure-Object | Select-Object -ExpandProperty Count
Write-Host "Done reading $($count) docs from $($indices.Count) indices ($($size)mb) in $($timer.Elapsed)"

step 2: import data into local elastic

with this script we are starting local elastic and importing data from previous step, after that we are stoping container and grabing its data

$endpoint = 'http://localhost:9200'
$username = 'elastic'
$password = 'xxxxxxx'

$headers =  @{Authorization=("Basic {0}" -f ([Convert]::ToBase64String([Text.Encoding]::ASCII.GetBytes(("{0}:{1}" -f $username,$password)))))}

$timer = [System.Diagnostics.Stopwatch]::StartNew()
docker rm -f elastic
docker run -d --name=elastic -e discovery.type=single-node -e xpack.security.enabled=true -e ELASTIC_PASSWORD=$($password) -p 9200:9200 docker.elastic.co/elasticsearch/elasticsearch:8.5.0
Start-Sleep -Seconds 20
$items = Get-Content -Encoding utf8 'data.json' | ConvertFrom-Json -AsHashtable
foreach ($item in $items) {
    Write-Host $item['index']

    $body = @{
        settings = @{
            number_of_replicas = 0
        }
        mappings = $item['mapping']
    }
    if ($item['analysis']) {
        $body.settings.analysis = $item['analysis']
    }
    $res = Invoke-RestMethod -Method Put "$($endpoint)/$($item['index'])" -Headers $headers -ContentType 'application/json;charset=utf-8' -Body (ConvertTo-Json -Depth 100 -InputObject $body)
    if ($res.acknowledged) {
        Write-Host "created"
    } else {
        $res | Out-Host
    }

    $ndjson = @()
    foreach($doc in $item['docs']) {
        $ndjson += ConvertTo-Json -Compress -InputObject @{
            index = @{
                _index = $doc._index
                _id = $doc._id
            }
        }
        $ndjson += ConvertTo-Json -Compress -Depth 100 -InputObject $doc._source
    }
    if ($ndjson.Count -gt 0) {
        $res = Invoke-RestMethod -Method Post "$($endpoint)/_bulk" -Headers $headers -ContentType 'application/x-ndjson;charset=utf-8' -Body (($ndjson -join "`n") + "`n")
        if (-not $res.errors) {
            Write-Host "$($item['docs'].Count) docs added"
        } else {
            $res | Out-Host
        }
    }

    if ($item['alias']) {
        $res = Invoke-RestMethod -Method Post "$($endpoint)/_aliases" -Headers $headers -ContentType 'application/json' -Body (ConvertTo-Json -Depth 100 -InputObject @{
            actions = @(@{
                add = @{
                    index = $item['index']
                    alias = $item['alias']
                }
            })
        })
        if ($res.acknowledged) {
            Write-Host "$($item['alias']) -> $($item['index'])"
        }
    }
}
Invoke-RestMethod -Method Post "$($endpoint)/_flush" -Headers $headers
Start-Sleep -Seconds 10

docker stop elastic
if (Test-Path data) {
    Remove-Item data -Force -Recurse
}
docker cp elastic:/usr/share/elasticsearch/data .
docker rm elastic

$size = [math]::Round((Get-ChildItem data -Recurse -ErrorAction SilentlyContinue | Measure-Object -Property Size -Sum | Select-Object -ExpandProperty Sum) / 1024 / 1024)
Write-Host "Done restoring $($items.Count) indices ($($size)mb) in $($timer.Elapsed)"

step 3: create container

now we are going to build our prepared image with help of following docker file

FROM docker.elastic.co/elasticsearch/elasticsearch:8.5.0

ENV discovery.type=single-node
ENV xpack.security.enabled=true
ENV ELASTIC_PASSWORD=xxxxxxx

COPY --chown=elasticsearch ./data/ /usr/share/elasticsearch/data/

EXPOSE 9200

docker build -t elastic .
docker run -d -p 9200:9200 --name=elastic elastic
sleep 20
curl -u elastic:xxxxxxx http://localhost:9200/_cat/indices
docker rm -f elastic

docker tag elastic gcr.io/contoso/minielastic
docker push gcr.io/contoso/minielastic

and from now on, we may use this image for local development without being dependant on internet