Skip to content

Commit

Permalink
test: add scale-test to workflow
Browse files Browse the repository at this point in the history
Signed-off-by: Alex Castilio dos Santos <[email protected]>
  • Loading branch information
alexcastilio committed Dec 18, 2024
1 parent 3e1a126 commit 69fe647
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 240 deletions.
8 changes: 5 additions & 3 deletions .github/workflows/scale-test-v2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,7 @@ jobs:
run: |
set -euo pipefail
# Placeholder for test
sleep 300 &
echo "TEST_PID=$!" >> $GITHUB_ENV
go test ./test/e2e/. -v -tags=scale -timeout 300s -args -image-tag=$(make version) -create-infra=false -delete-infra=false &
- name: Clone ClusterLoader2
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
Expand Down Expand Up @@ -174,7 +173,10 @@ jobs:
- name: Stop test
shell: bash
run: kill $TEST_PID
run: |
PID=$(ps aux | grep "go test" | awk '{print $2}')
echo $PID
kill -s 15 $PID
cleanup:
name: Cleanup
Expand Down
14 changes: 13 additions & 1 deletion test/e2e/framework/scaletest/get-publish-metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ import (
"fmt"
"log"
"os"
"os/signal"
"sync"
"syscall"
"time"

"github.com/microsoft/retina/pkg/telemetry"
Expand Down Expand Up @@ -44,6 +46,11 @@ func (g *GetAndPublishMetrics) Run() error {
}

g.stop = make(chan struct{})

sigs := make(chan os.Signal, 1)

signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)

g.wg.Add(1)

go func() {
Expand All @@ -66,9 +73,14 @@ func (g *GetAndPublishMetrics) Run() error {

}
}

}()

sig := <-sigs

fmt.Println()
fmt.Println("syscall received: ", sig)
g.stop <- struct{}{}

return nil
}

Expand Down
38 changes: 3 additions & 35 deletions test/e2e/framework/scaletest/options.go
Original file line number Diff line number Diff line change
@@ -1,40 +1,8 @@
package scaletest

import "time"

// Options holds parameters for the scale test
type Options struct {
Namespace string
MaxKwokPodsPerNode int
NumKwokDeployments int
NumKwokReplicas int
MaxRealPodsPerNode int
NumRealDeployments int
RealPodType string
NumRealReplicas int
NumRealServices int
NumNetworkPolicies int
NumUnapliedNetworkPolicies int
NumUniqueLabelsPerPod int
NumUniqueLabelsPerDeployment int
NumSharedLabelsPerPod int
KubeconfigPath string
RestartNpmPods bool
DebugExitAfterPrintCounts bool
DebugExitAfterGeneration bool
SleepAfterCreation time.Duration
DeleteKwokPods bool
DeleteRealPods bool
DeletePodsInterval time.Duration
DeletePodsTimes int
DeleteLabels bool
DeleteLabelsInterval time.Duration
DeleteLabelsTimes int
DeleteNetworkPolicies bool
DeleteNetworkPoliciesInterval time.Duration
DeleteNetworkPoliciesTimes int
numKwokPods int
numRealPods int
LabelsToGetMetrics map[string]string
AdditionalTelemetryProperty map[string]string
KubeconfigPath string
LabelsToGetMetrics map[string]string
AdditionalTelemetryProperty map[string]string
}
49 changes: 0 additions & 49 deletions test/e2e/framework/scaletest/validate-options.go

This file was deleted.

95 changes: 4 additions & 91 deletions test/e2e/jobs/scale.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,116 +2,29 @@ package retina

import (
"os"
"time"

"github.com/microsoft/retina/test/e2e/framework/kubernetes"
"github.com/microsoft/retina/test/e2e/framework/scaletest"
"github.com/microsoft/retina/test/e2e/framework/types"
)

func DefaultScaleTestOptions() scaletest.Options {
return scaletest.Options{
Namespace: "scale-test",
MaxKwokPodsPerNode: 0,
NumKwokDeployments: 0,
NumKwokReplicas: 0,
MaxRealPodsPerNode: 100,
NumRealDeployments: 1000,
RealPodType: "kapinger",
NumRealReplicas: 40,
NumRealServices: 1000,
NumNetworkPolicies: 10,
NumUnapliedNetworkPolicies: 10,
NumUniqueLabelsPerPod: 0,
NumUniqueLabelsPerDeployment: 1,
NumSharedLabelsPerPod: 3,
KubeconfigPath: "",
RestartNpmPods: false,
SleepAfterCreation: 0,
DeleteKwokPods: false,
DeletePodsInterval: 60 * time.Second,
DeleteRealPods: false,
DeletePodsTimes: 1,
DeleteLabels: false,
DeleteLabelsInterval: 60 * time.Second,
DeleteLabelsTimes: 1,
DeleteNetworkPolicies: false,
DeleteNetworkPoliciesInterval: 60 * time.Second,
DeleteNetworkPoliciesTimes: 1,
LabelsToGetMetrics: map[string]string{},
AdditionalTelemetryProperty: map[string]string{},
LabelsToGetMetrics: map[string]string{},
AdditionalTelemetryProperty: map[string]string{},
}
}

func ScaleTest(opt *scaletest.Options) *types.Job {
job := types.NewJob("Scale Test")

job.AddStep(&scaletest.ValidateAndPrintOptions{
Options: opt,
}, nil)

job.AddStep(&scaletest.ValidateNumOfNodes{
KubeConfigFilePath: opt.KubeconfigPath,
Label: map[string]string{"scale-test": "true"},
NumNodesRequired: (opt.NumRealDeployments*opt.NumRealReplicas +
opt.MaxRealPodsPerNode - 1) / opt.MaxRealPodsPerNode,
}, nil)

job.AddStep(&kubernetes.DeleteNamespace{
Namespace: opt.Namespace,
}, nil)

job.AddStep(&kubernetes.CreateNamespace{}, nil)

job.AddStep(&scaletest.GetAndPublishMetrics{
KubeConfigFilePath: opt.KubeconfigPath,
Labels: opt.LabelsToGetMetrics,
AdditionalTelemetryProperty: opt.AdditionalTelemetryProperty,
OutputFilePath: os.Getenv("OUTPUT_FILEPATH"),
OutputFilePath: os.Getenv("OUTPUT_FILEPATH"),
}, &types.StepOptions{
SkipSavingParametersToJob: true,
RunInBackgroundWithID: "get-metrics",
})

job.AddStep(&scaletest.CreateResources{
NumKwokDeployments: opt.NumKwokDeployments,
NumKwokReplicas: opt.NumKwokReplicas,
RealPodType: opt.RealPodType,
NumRealDeployments: opt.NumRealDeployments,
NumRealReplicas: opt.NumRealReplicas,
NumRealServices: opt.NumRealServices,
NumUniqueLabelsPerDeployment: opt.NumUniqueLabelsPerDeployment,
}, nil)

job.AddStep(&scaletest.AddSharedLabelsToAllPods{
NumSharedLabelsPerPod: opt.NumSharedLabelsPerPod,
}, nil)

job.AddStep(&scaletest.AddUniqueLabelsToAllPods{
NumUniqueLabelsPerPod: opt.NumUniqueLabelsPerPod,
}, nil)

// Apply network policies (applied and unapplied)
job.AddStep(&scaletest.CreateNetworkPolicies{
NumNetworkPolicies: opt.NumNetworkPolicies,
NumSharedLabelsPerPod: opt.NumSharedLabelsPerPod,
}, nil)

job.AddStep(&kubernetes.WaitPodsReady{
LabelSelector: "is-real=true",
}, nil)

job.AddStep(&scaletest.DeleteAndReAddLabels{
DeleteLabels: opt.DeleteLabels,
DeleteLabelsInterval: opt.DeleteLabelsInterval,
DeleteLabelsTimes: opt.DeleteLabelsTimes,
NumSharedLabelsPerPod: opt.NumSharedLabelsPerPod,
}, nil)

job.AddStep(&types.Stop{
BackgroundID: "get-metrics",
}, nil)

job.AddStep(&kubernetes.DeleteNamespace{}, nil)

return job
}
64 changes: 3 additions & 61 deletions test/e2e/scale_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,8 @@
package retina

import (
"crypto/rand"
"math/big"
"os"
"path/filepath"
"strconv"
"testing"

"github.com/microsoft/retina/test/e2e/common"
Expand All @@ -28,57 +25,19 @@ func TestE2ERetina_Scale(t *testing.T) {
subID := os.Getenv("AZURE_SUBSCRIPTION_ID")
require.NotEmpty(t, subID)

location := os.Getenv("AZURE_LOCATION")
if location == "" {
nBig, err := rand.Int(rand.Reader, big.NewInt(int64(len(common.AzureLocations))))
if err != nil {
t.Fatal("Failed to generate a secure random index", err)
}
location = common.AzureLocations[nBig.Int64()]
}

rg := os.Getenv("AZURE_RESOURCE_GROUP")
if rg == "" {
// Use the cluster name as the resource group name by default.
rg = clusterName
}

cwd, err := os.Getwd()
require.NoError(t, err)

// Get to root of the repo by going up two directories
rootDir := filepath.Dir(filepath.Dir(cwd))

chartPath := filepath.Join(rootDir, "deploy", "legacy", "manifests", "controller", "helm", "retina")
kubeConfigFilePath := filepath.Join(rootDir, "test", "e2e", "test.pem")
kubeConfigFilePath := filepath.Join(os.Getenv("HOME"), ".kube", "config")

// Scale test parameters
opt := jobs.DefaultScaleTestOptions()
opt.KubeconfigPath = kubeConfigFilePath

NumDeployments := os.Getenv("NUM_DEPLOYMENTS")
NumReplicas := os.Getenv("NUM_REPLICAS")
NumNetworkPolicies := os.Getenv("NUM_NET_POL")
CleanUp := os.Getenv("CLEANUP")

if NumDeployments != "" {
opt.NumRealDeployments, err = strconv.Atoi(NumDeployments)
opt.NumRealServices = opt.NumRealDeployments
require.NoError(t, err)
}
if NumReplicas != "" {
opt.NumRealReplicas, err = strconv.Atoi(NumReplicas)
require.NoError(t, err)
}
if NumNetworkPolicies != "" {
opt.NumNetworkPolicies, err = strconv.Atoi(NumNetworkPolicies)
require.NoError(t, err)
}
if CleanUp != "" {
opt.DeleteLabels, err = strconv.ParseBool(CleanUp)
require.NoError(t, err)
}

// TODO: Get Retina Version from cluster or change ENV VAR
RetinaVersion := os.Getenv(generic.DefaultTagEnv)
require.NotEmpty(t, RetinaVersion)
opt.AdditionalTelemetryProperty["retinaVersion"] = RetinaVersion
Expand All @@ -87,30 +46,13 @@ func TestE2ERetina_Scale(t *testing.T) {
// AppInsightsKey is required for telemetry
require.NotEmpty(t, os.Getenv(common.AzureAppInsightsKeyEnv))

// Agent label
opt.LabelsToGetMetrics = map[string]string{"k8s-app": "retina"}

// CreateTestInfra
createTestInfra := types.NewRunner(t, jobs.CreateTestInfra(subID, rg, clusterName, location, kubeConfigFilePath, *common.CreateInfra))
createTestInfra.Run(ctx)

t.Cleanup(func() {
if *common.DeleteInfra {
_ = jobs.DeleteTestInfra(subID, rg, clusterName, location).Run()
}
})

fqdn, err := azure.GetFqdnFn(subID, rg, clusterName)
require.NoError(t, err)
opt.AdditionalTelemetryProperty["clusterFqdn"] = fqdn

// Install Retina
installRetina := types.NewRunner(t, jobs.InstallRetina(kubeConfigFilePath, chartPath))
installRetina.Run(ctx)

t.Cleanup(func() {
_ = jobs.UninstallRetina(kubeConfigFilePath, chartPath).Run()
})

scale := types.NewRunner(t, jobs.ScaleTest(&opt))
scale.Run(ctx)
}

0 comments on commit 69fe647

Please sign in to comment.