Calculate costs per labels of pods from Prometheus metrics provided by Kubecost

kubernetesawsprometheus

Kubecost is a tool to visualize Kubernetes costs and optimize them. AWS provides EKS optimized bundle and some Enterprise features are available.

Install with Helm. Prometheus will be installed by default, but you can use existing one.

Install Prometheus with CDK and remote write aggregated data to New Relic with recording rules to save the amount of data - sambaiz-net

cluster.addHelmChart('KubecostHelmChart', {
  chart: 'cost-analyzer',
  repository: 'oci://public.ecr.aws/kubecost/cost-analyzer',
  namespace: 'kubecost',
  release: 'kubecost',
  version: '2.3.1',
  values: {
    global: {
      grafana: {
        enabled: false,
        proxy: false,
      },
      /*
      prometheus: {
        enabled: false, // If false, use an existing Prometheus install.
        fqdn: "http://kube-prometheus-stack-prometheus.prometheus.svc.cluster.local:9090",
      },
      */
    },
    /*
    /*
    serviceMonitor: {
      enabled: true, // Set this to true to create ServiceMonitor for Prometheus operator
      additionalLabels: {
        release: 'kube-prometheus-stack',
      },
    },
    */
    prometheus: {
      server: {
        global: {
          external_labels: {
            cluster_id: cluster.clusterName,
          },
        },
      }
    },
    // https://raw.githubusercontent.com/kubecost/cost-analyzer-helm-chart/develop/cost-analyzer/values-eks-cost-monitoring.yaml
    kubecostFrontend: {
      image: 'public.ecr.aws/kubecost/frontend',
    },
    kubecostModel: {
      image: 'public.ecr.aws/kubecost/cost-model',
    },
    forecasting: {
      enabled: false,
    },
  },
})

On dashboard, you can see how efficiently resources are being used,

and can know how to optimize costs.

Kubecost provides the following Prometheus metrics. Costs for CPU and RAM are calculated by dividing the instance cost by the unit price in the configuration file.

# Cumulative cpu time consumed
container_cpu_usage_seconds_total{cpu="total", endpoint="https-metrics", id="/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod24c72e02_8120_4259_a3af_7f6030bfbced.slice", instance="10.0.159.230:10250", job="kubelet", metrics_path="/metrics/cadvisor", namespace="kube-system", node="ip-10-0-159-230.ec2.internal", pod="kube-proxy-9wr4l", service="kube-prometheus-stack-kubelet"}

# Kubernetes labels converted to Prometheus labels
kube_pod_labels{container="cost-model", endpoint="tcp-model", instance="10.0.145.101:9003", job="kubecost-cost-analyzer", label_controller_revision_hash="6c75fb6796", label_k8s_app="kube-proxy", label_pod_template_generation="1", namespace="kube-system", pod="kube-proxy-9wr4l", service="kubecost-cost-analyzer", uid="24c72e02-8120-4259-a3af-7f6030bfbced"}

# Hourly cost per vCPU on this node
node_cpu_hourly_cost{arch="amd64", container="cost-model", endpoint="tcp-model", instance="ip-10-0-159-230.ec2.internal", instance_type="m5.2xlarge", job="kubecost-cost-analyzer", namespace="kubecost", node="ip-10-0-159-230.ec2.internal", pod="kubecost-cost-analyzer-8cb64b444-5whwh", provider_id="aws:///us-east-1a/i-03d19e8ad331842a5", region="us-east-1", service="kubecost-cost-analyzer"}

You can calculate hourly costs per labels as follows, but pay attention that while container_cpu_usage_seconds_total is in seconds, node_cpu_hourly_cost and node_ram_hourly_cost are in hours.

Get and aggregate metrics with PromQL - sambaiz-net

sum(
  # CPU
  max(max_over_time(kube_pod_labels{label_k8s_app!=""}[1h])) by (pod, label_k8s_app)
  * on (pod) group_left()
  sum(
    increase(container_cpu_usage_seconds_total{container!=""}[1h])
    * on(node) group_left() min(min_over_time(node_cpu_hourly_cost[1h])) by (node)
    / 3600
  ) by (pod)

  + on (pod) group_left()
  # RAM
  max(max_over_time(kube_pod_labels{label_k8s_app != ""}[1h])) by (pod, label_k8s_app)
  * on (pod) group_left()
  sum(
    avg_over_time(container_memory_working_set_bytes[1h])
    * on(node) group_left() min(min_over_time(node_ram_hourly_cost[1h])) by (node)
    / 1024 / 1024 / 1024
  ) by (pod)
) by (label_k8s_app)

By default, instance costs are calculated at the on-demand rate. To reflect Spot rates, you can integrate the Cost and Usage Report, but it takes about up to 48 hours to update. Another method is to read the spot instance data feed, which stores the rates for each Spot instance in the Bucket specified by createSpotDatafeedSubscription. In this case, if s3:*Acl is not present, I saw “the specified bucket does not exist or does not have enough permissions” error, so I added it. Note that this feed can be registered only one per account.

Call AWS API with AwsCustomResource in CDK - sambaiz-net

const spotDataFeedBucket = new s3.Bucket(this, 'KubecostSpotDatafeedBucket', {
  bucketName: `${cluster.clusterName}-kubecost-spot-datafeed`,
  removalPolicy: cdk.RemovalPolicy.DESTROY,
  // https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/spot-data-feeds.html#using-spot-instances-dfs3
  accessControl: s3.BucketAccessControl.BUCKET_OWNER_FULL_CONTROL,
  objectOwnership: s3.ObjectOwnership.OBJECT_WRITER,
})

// https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_CreateSpotDatafeedSubscription.html
new cdk.custom_resources.AwsCustomResource(this, 'CreateSpotDatafeedSubscription', {
  onCreate: {
    service: 'ec2',
    action: 'createSpotDatafeedSubscription',
    parameters: {
      Bucket: spotDataFeedBucket.bucketName,
    },
    physicalResourceId: cdk.custom_resources.PhysicalResourceId.of(spotDataFeedBucket.bucketName),
  },
  onDelete: {
    service: 'ec2',
    action: 'deleteSpotDatafeedSubscription',
  },
  policy: cdk.custom_resources.AwsCustomResourcePolicy.fromStatements([
    new iam.PolicyStatement({
      actions: ['ec2:CreateSpotDatafeedSubscription', 'ec2:DeleteSpotDatafeedSubscription'],
      resources: ['*'],
    }),
    new iam.PolicyStatement({
      actions: ['s3:*Acl'],
      resources: [spotDataFeedBucket.bucketArn],
    }),
  ])
}).node.addDependency(spotDataFeedBucket)

If you specify this bucket in kubecostProductConfigs and grant read permissions to the ServiceAccount, the spot pricing will be reflected. Regarding the spotLabel representing spot instances, it automatically refers to managed node groups and Karpenter labels, so you do not need to set it if you are using these.

const role = new cdk.aws_iam.Role(this, 'KubecostRole', {
  roleName: `${cluster.clusterName}-kubecost`,
  assumedBy: new iam.WebIdentityPrincipal(
    cluster.openIdConnectProvider.openIdConnectProviderArn,
    {
      StringEquals: new cdk.CfnJson(
        this,
        'KubecostRoleStringEquals',
        {
          value: {
            [`${cluster.clusterOpenIdConnectIssuer}:aud`]:
                'sts.amazonaws.com',
            [`${cluster.clusterOpenIdConnectIssuer}:sub`]:
                'system:serviceaccount:kubecost:kubecost-cost-analyzer',
          },
        }
      ),
    }
  ),
  inlinePolicies: {
    'SpotDataFeed': new cdk.aws_iam.PolicyDocument({
      statements: [
        new cdk.aws_iam.PolicyStatement({
          actions: [
            's3:ListAllMyBuckets',
            's3:ListBucket',
            's3:List*',
            's3:Get*',
          ],
          resources: [spotDataFeedBucket.bucketArn, `${spotDataFeedBucket.bucketArn}/*`],
        }),
      ],
    }),
  }
})

cluster.addHelmChart('KubecostHelmChart', {
  ...
  values: {
    ...
    kubecostProductConfigs: {
      projectID: cdk.Stack.of(this).account,
      awsSpotDataRegion: cdk.Stack.of(this).region,
      awsSpotDataBucket: spotDataFeedBucket.bucketName,
    },
    serviceAccount: {
      annotations: {
        'eks.amazonaws.com/role-arn': role.roleArn,
      },
    }
  },
})