minified elasticsearch container
sample script pieces to create an minified clone of production elasticsearch
step 1: export data from production elasticsearch
with help of this script we will grap 1K of documents from each index, plus their mappings and analysis and save all that to data.json
file
$max = 1000
$endpoint = 'https://xxxxxx.es.europe-west3.gcp.cloud.es.io:9243'
$username = 'elastic'
$password = 'xxxxxxx'
$headers = @{Authorization=("Basic {0}" -f ([Convert]::ToBase64String([Text.Encoding]::ASCII.GetBytes(("{0}:{1}" -f $username,$password)))))}
$aliases = Invoke-RestMethod "$endpoint/_cat/aliases?format=json" -Headers $headers
$indices = Invoke-RestMethod "$endpoint/_cat/indices?format=json" -Headers $headers
$data = @()
$timer = [System.Diagnostics.Stopwatch]::StartNew()
foreach($index in $indices) {
if ($index.index.StartsWith('.')) { continue }
if ($index.index.StartsWith('apm')) { continue }
$settings = Invoke-WebRequest -UseBasicParsing "$endpoint/$($index.index)/_settings" -Headers $headers | Select-Object -ExpandProperty Content | ConvertFrom-Json -AsHashtable
$mapping = Invoke-WebRequest -UseBasicParsing "$endpoint/$($index.index)/_mapping" -Headers $headers | Select-Object -ExpandProperty Content | ConvertFrom-Json -AsHashtable
$docs = Invoke-RestMethod -Method POST "$endpoint/$($index.index)/_search" -Headers $headers -ContentType 'application/json' -Body (ConvertTo-Json -Depth 100 -InputObject @{
query = @{ match_all = @{} }
size = $max
}) | Select-Object -ExpandProperty hits -ErrorAction SilentlyContinue | Select-Object -ExpandProperty hits -ErrorAction SilentlyContinue
if (-not $docs) {
$docs = @()
}
$alias = $aliases | Where-Object index -EQ $($index.index) | Select-Object -ExpandProperty alias
$data += [PSCustomObject]@{
index = $index.index
mapping = $mapping[$index.index]['mappings']
docs = $docs
alias = $alias
analysis = $settings[$index.index]['settings']['index']['analysis']
}
# Write-Progress -Activity 'Exporting' -Status $index.index -PercentComplete ( [Array]::IndexOf($indices, $index) / $indices.Count * 100 )
if ($alias) {
Write-Host "$alias -> $($index.index) $($docs.Count) $($timer.Elapsed)"
} else {
Write-Host "$($index.index) $($docs.Count) $($timer.Elapsed)"
}
}
ConvertTo-Json -Depth 100 -InputObject $data | Out-File -Encoding utf8 'data.json'
$size = [math]::round(( Get-ChildItem 'data.json' | Select-Object -ExpandProperty Size ) / 1024 / 1024)
$count = $data | Select-Object -ExpandProperty docs | Measure-Object | Select-Object -ExpandProperty Count
Write-Host "Done reading $($count) docs from $($indices.Count) indices ($($size)mb) in $($timer.Elapsed)"
step 2: import data into local elastic
with this script we are starting local elastic and importing data from previous step, after that we are stoping container and grabing its data
$endpoint = 'http://localhost:9200'
$username = 'elastic'
$password = 'xxxxxxx'
$headers = @{Authorization=("Basic {0}" -f ([Convert]::ToBase64String([Text.Encoding]::ASCII.GetBytes(("{0}:{1}" -f $username,$password)))))}
$timer = [System.Diagnostics.Stopwatch]::StartNew()
docker rm -f elastic
docker run -d --name=elastic -e discovery.type=single-node -e xpack.security.enabled=true -e ELASTIC_PASSWORD=$($password) -p 9200:9200 docker.elastic.co/elasticsearch/elasticsearch:8.5.0
Start-Sleep -Seconds 20
$items = Get-Content -Encoding utf8 'data.json' | ConvertFrom-Json -AsHashtable
foreach ($item in $items) {
Write-Host $item['index']
$body = @{
settings = @{
number_of_replicas = 0
}
mappings = $item['mapping']
}
if ($item['analysis']) {
$body.settings.analysis = $item['analysis']
}
$res = Invoke-RestMethod -Method Put "$($endpoint)/$($item['index'])" -Headers $headers -ContentType 'application/json;charset=utf-8' -Body (ConvertTo-Json -Depth 100 -InputObject $body)
if ($res.acknowledged) {
Write-Host "created"
} else {
$res | Out-Host
}
$ndjson = @()
foreach($doc in $item['docs']) {
$ndjson += ConvertTo-Json -Compress -InputObject @{
index = @{
_index = $doc._index
_id = $doc._id
}
}
$ndjson += ConvertTo-Json -Compress -Depth 100 -InputObject $doc._source
}
if ($ndjson.Count -gt 0) {
$res = Invoke-RestMethod -Method Post "$($endpoint)/_bulk" -Headers $headers -ContentType 'application/x-ndjson;charset=utf-8' -Body (($ndjson -join "`n") + "`n")
if (-not $res.errors) {
Write-Host "$($item['docs'].Count) docs added"
} else {
$res | Out-Host
}
}
if ($item['alias']) {
$res = Invoke-RestMethod -Method Post "$($endpoint)/_aliases" -Headers $headers -ContentType 'application/json' -Body (ConvertTo-Json -Depth 100 -InputObject @{
actions = @(@{
add = @{
index = $item['index']
alias = $item['alias']
}
})
})
if ($res.acknowledged) {
Write-Host "$($item['alias']) -> $($item['index'])"
}
}
}
Invoke-RestMethod -Method Post "$($endpoint)/_flush" -Headers $headers
Start-Sleep -Seconds 10
docker stop elastic
if (Test-Path data) {
Remove-Item data -Force -Recurse
}
docker cp elastic:/usr/share/elasticsearch/data .
docker rm elastic
$size = [math]::Round((Get-ChildItem data -Recurse -ErrorAction SilentlyContinue | Measure-Object -Property Size -Sum | Select-Object -ExpandProperty Sum) / 1024 / 1024)
Write-Host "Done restoring $($items.Count) indices ($($size)mb) in $($timer.Elapsed)"
step 3: create container
now we are going to build our prepared image with help of following docker file
FROM docker.elastic.co/elasticsearch/elasticsearch:8.5.0
ENV discovery.type=single-node
ENV xpack.security.enabled=true
ENV ELASTIC_PASSWORD=xxxxxxx
COPY --chown=elasticsearch ./data/ /usr/share/elasticsearch/data/
EXPOSE 9200
docker build -t elastic .
docker run -d -p 9200:9200 --name=elastic elastic
sleep 20
curl -u elastic:xxxxxxx http://localhost:9200/_cat/indices
docker rm -f elastic
docker tag elastic gcr.io/contoso/minielastic
docker push gcr.io/contoso/minielastic
and from now on, we may use this image for local development without being dependant on internet