下载kube-prometheus-stack
wget https://kkgithub.com/prometheus-community/helm-charts/releases/download/kube-prometheus-stack-65.1.1/kube-prometheus-stack-65.1.1.tgz
解压kube-prometheus-stack
tar -zxvf kube-prometheus-stack-65.1.1.tgz
创建命名空间
kubectl create ns kube-prom
helm安装kube-prometheus-stack
helm install kube-promethues-stack . -n kube-prom --debug
查看所有pod的启动情况
kubectl get pod -n kube-prom
查看pod的日志
当pod出现ErrImagePull错误时,使用如下命令查看具体下载失败的镜像
kubectl describe pod kube-promethues-stack-grafana-5679d4cbbf-7l25h -n kube-prom
不存在的镜像按照如下步骤操作,并把镜像拷贝到所有k8s节点。(这个操作需要重复多次,直到所有镜像都改成可以下载的国内镜像才行。)
#拉取镜像
docker pull swr.cn-north-4.myhuaweicloud.com/ddn-k8s/registry.k8s.io/ingress-nginx/kube-webhook-certgen:v20221220-controller-v1.5.1-58-g787ea74b6
#给镜像打tag
docker tag swr.cn-north-4.myhuaweicloud.com/ddn-k8s/registry.k8s.io/ingress-nginx/kube-webhook-certgen:v20221220-controller-v1.5.1-58-g787ea74b6 registry.k8s.io/ingress-nginx/kube-webhook-certgen:v20221220-controller-v1.5.1-58-g787ea74b6
#保存镜像
docker save -o kube-webhook-certgen.tar registry.k8s.io/ingress-nginx/kube-webhook-certgen:v20221220-controller-v1.5.1-58-g787ea74b6
#远程拷贝镜像
scp kube-webhook-certgen.tar root@10.0.2.12:/root
#加载镜像
docker load -i kube-webhook-certgen.tar
#拉取镜像
docker pull swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/grafana/grafana:11.2.1
#给镜像打tag
docker tag swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/grafana/grafana:11.2.1 docker.io/grafana/grafana:11.2.1
#保存镜像
docker save -o grafana.tar docker.io/grafana/grafana:11.2.1
#远程拷贝镜像
scp grafana.tar root@10.0.2.12:/root
#加载镜像
docker load -i grafana.tar
卸载kube-promethues-stack
参考
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack
helm uninstall kube-promethues-stack -n kube-prom
删除 CRD 资源
kubectl delete crd alertmanagerconfigs.monitoring.coreos.com
kubectl delete crd alertmanagers.monitoring.coreos.com
kubectl delete crd podmonitors.monitoring.coreos.com
kubectl delete crd probes.monitoring.coreos.com
kubectl delete crd prometheusagents.monitoring.coreos.com
kubectl delete crd prometheuses.monitoring.coreos.com
kubectl delete crd prometheusrules.monitoring.coreos.com
kubectl delete crd scrapeconfigs.monitoring.coreos.com
kubectl delete crd servicemonitors.monitoring.coreos.com
kubectl delete crd thanosrulers.monitoring.coreos.com
访问
查看service
kubectl get svc -n kube-prom -o wide
默认情况下,只能在k8s集群内部访问promethues和grafana。所以需要讲service的将type从ClusterIP修改为NodePort或者LoadBalancer。
kubectl edit svc kube-promethues-stack-kube-prometheus -n kube-prom
kubectl edit svc kube-promethues-stack-grafana -n kube-prom
访问Grafana面板,初始账号admin
;密码是prom-operator
package k8s_demo;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.yaml.snakeyaml.Yaml;
import com.alibaba.fastjson2.JSON;
import com.crd.models.V1PrometheusRule;
import com.crd.models.V1PrometheusRuleSpec;
import com.crd.models.V1PrometheusRuleSpecGroups;
import com.crd.models.V1PrometheusRuleSpecRules;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
import io.kubernetes.client.openapi.ApiClient;
import io.kubernetes.client.openapi.ApiException;
import io.kubernetes.client.openapi.Configuration;
import io.kubernetes.client.openapi.apis.CustomObjectsApi;
import io.kubernetes.client.openapi.apis.CustomObjectsApi.APIcreateClusterCustomObjectRequest;
import io.kubernetes.client.openapi.apis.CustomObjectsApi.APIcreateNamespacedCustomObjectRequest;
import io.kubernetes.client.openapi.apis.CustomObjectsApi.APIdeleteNamespacedCustomObjectRequest;
import io.kubernetes.client.openapi.apis.CustomObjectsApi.APIlistClusterCustomObjectRequest;
import io.kubernetes.client.openapi.apis.CustomObjectsApi.APIlistNamespacedCustomObjectRequest;
import io.kubernetes.client.openapi.models.V1ObjectMeta;
import io.kubernetes.client.util.ClientBuilder;
import io.kubernetes.client.util.credentials.AccessTokenAuthentication;
public class CRDdemo {
public static void main(String[] args) {
ApiClient client = new ClientBuilder().setBasePath("https://10.0.2.11:6443")
.setVerifyingSsl(false)
.setAuthentication(new AccessTokenAuthentication("eyJhbGciOiJSUzI1NiIsImtpZCI6ImQ4Slh0Mk9lMzd0TXNlZW9sbGRRMUVfRWtYSHVnNnFwMG11TmhYR3dWM2cifQ.eyJpc3MiOiJrdWJlcm5ldGVzL3NlcnZpY2VhY2NvdW50Iiwia3ViZXJuZXRlcy5pby9zZXJ2aWNlYWNjb3VudC9uYW1lc3BhY2UiOiJrdWJlLXN5c3RlbSIsImt1YmVybmV0ZXMuaW8vc2VydmljZWFjY291bnQvc2VjcmV0Lm5hbWUiOiJteS1hZG1pbi10b2tlbiIsImt1YmVybmV0ZXMuaW8vc2VydmljZWFjY291bnQvc2VydmljZS1hY2NvdW50Lm5hbWUiOiJteS1hZG1pbiIsImt1YmVybmV0ZXMuaW8vc2VydmljZWFjY291bnQvc2VydmljZS1hY2NvdW50LnVpZCI6ImMxYjgyNmU0LTM4YzctNGI1Yy05MzliLTU3MmExNzQ4ZjhjNiIsInN1YiI6InN5c3RlbTpzZXJ2aWNlYWNjb3VudDprdWJlLXN5c3RlbTpteS1hZG1pbiJ9.giP37iGgtXoBCPRj4YBu0ooovb8SbOXuMyAfQx5erzbh-s1AJZTsMVho-Hu2VlyeRcQ7AkLy44EUMdf__yy0XR44qXYRlN6-gG0yAMHTSt_mBbfbpt35uJ39jAnmFlS9SGgTfzAJdjoDzA6Vhq7_njab6Dkc9wmYuIAR4Q1fUEjIdkVb-558xlqegouUd4TNC855p6gwUoTLMZaNo1wGMHEa94HV37ECpGsQ2gSr4nEw29LQOHei96HfRuBdJa7lLhnuaqqKqE8tR9DuGVN5adtmC-AnSabRlkCgjM7KmB3b7BBndlRuG4ZcuARCCNvrbUM0N_Z43hL6PgEtSXFmxg"))
.build();
Configuration.setDefaultApiClient(client);
System.out.println(client);
CustomObjectsApi apiInstance = new CustomObjectsApi(client);
try {
String group = "monitoring.coreos.com"; // String | The custom resource's group name
String version = "v1"; // String | The custom resource's version
String plural = "prometheusrules"; // String | The custom resource's plural name. For TPRs this would be lowercase plural kind.
Object body = null; // Object | The JSON schema of the Resource to create.
String pretty = "true"; // String | If 'true', then the output is pretty printed.
APIlistClusterCustomObjectRequest aPIlistClusterCustomObjectRequest = apiInstance.listClusterCustomObject(group, version, plural);
Object result = aPIlistClusterCustomObjectRequest.execute();
System.out.println(JSON.toJSON(result));
System.out.println("-----------------------------");
APIlistNamespacedCustomObjectRequest aPIlistNamespacedCustomObjectRequest = apiInstance.listNamespacedCustomObject(group, version, pretty, plural);
result = aPIlistNamespacedCustomObjectRequest.execute();
System.out.println(result);
System.out.println("-----------------------------");
} catch (ApiException e) {
System.err.println("Exception when calling CustomObjectsApi#listNamespacedCustomObject");
System.err.println("Status code: " + e.getCode());
System.err.println("Reason: " + e.getResponseBody());
System.err.println("Response headers: " + e.getResponseHeaders());
e.printStackTrace();
}catch (Exception e) {
e.printStackTrace();
}
try {
String group = "monitoring.coreos.com"; // String | The custom resource's group name
String version = "v1"; // String | The custom resource's version
String plural = "prometheusrules"; // String | The custom resource's plural name. For TPRs this would be lowercase plural kind.
String pretty = "true"; // String | If 'true', then the output is pretty printed.
String namespace = "kube-prom";
V1PrometheusRule v1PrometheusRule = new V1PrometheusRule();
v1PrometheusRule.setKind("PrometheusRule");
v1PrometheusRule.setApiVersion("monitoring.coreos.com/v1");
V1ObjectMeta v1ObjectMeta = new V1ObjectMeta();
Map<String, String> labelsMap = new HashMap<String, String>();
labelsMap.put("app", "kube-prometheus-stack");
labelsMap.put("app.kubernetes.io/instance", "kube-promethues-stack");
labelsMap.put("app.kubernetes.io/managed-by", "Helm");
labelsMap.put("app.kubernetes.io/part-of", "kube-prometheus-stack");
labelsMap.put("app.kubernetes.io/version", "65.1.1");
labelsMap.put("chart", "kube-prometheus-stack-65.1.1");
labelsMap.put("heritage", "Helm");
labelsMap.put("release", "kube-promethues-stack");
v1ObjectMeta.setLabels(labelsMap);
v1ObjectMeta.setName("custom-rule2");
v1ObjectMeta.setNamespace("kube-prom");
v1PrometheusRule.setMetadata(v1ObjectMeta);
V1PrometheusRuleSpec v1PrometheusRuleSpec = new V1PrometheusRuleSpec();
List<V1PrometheusRuleSpecGroups> groups = new ArrayList<V1PrometheusRuleSpecGroups>();
V1PrometheusRuleSpecGroups v1PrometheusRuleSpecGroups = new V1PrometheusRuleSpecGroups();
v1PrometheusRuleSpecGroups.setName("disk");
groups.add(v1PrometheusRuleSpecGroups);
v1PrometheusRuleSpec.setGroups(groups);
List<V1PrometheusRuleSpecRules> rules = new ArrayList<V1PrometheusRuleSpecRules>();
V1PrometheusRuleSpecRules v1PrometheusRuleSpecRules = new V1PrometheusRuleSpecRules();
v1PrometheusRuleSpecRules.setAlert("diskFree");
Map<String, String> annotationsMap = new HashMap<String, String>();
annotationsMap.put("value", "{{$value}}");
annotationsMap.put("summary", "{{ $labels.job }} 项目实例 {{ $labels.instance }} 磁盘使用率大于 80%");
annotationsMap.put("description", "{{ $labels.instance }} {{ $labels.mountpoint }} 磁盘使用率大于80% (当前的值: {{ $value }}%),请及时处理");
v1PrometheusRuleSpecRules.setAnnotations(annotationsMap);
v1PrometheusRuleSpecRules.setExpr("(1-(node_filesystem_free_bytes{fstype=~\"ext4|xfs\",mountpoint!=\"/boot\"} / node_filesystem_size_bytes{fstype=~\"ext4|xfs\",mountpoint!=\"/boot\"}) )*100 > 80");
v1PrometheusRuleSpecRules.setFor("1m");
Map<String, String> labelsMap2 = new HashMap<String, String>();
labelsMap2.put("level", "disaster");
labelsMap2.put("severity", "warning");
v1PrometheusRuleSpecRules.setLabels(labelsMap2);
rules.add(v1PrometheusRuleSpecRules);
v1PrometheusRuleSpecGroups.setRules(rules);
v1PrometheusRule.setSpec(v1PrometheusRuleSpec);
APIcreateNamespacedCustomObjectRequest aPIcreateNamespacedCustomObjectRequest = apiInstance.createNamespacedCustomObject(group, version, namespace, plural, v1PrometheusRule);
aPIcreateNamespacedCustomObjectRequest.pretty(pretty);
Object result = aPIcreateNamespacedCustomObjectRequest.execute();
System.out.println(JSON.toJSON(result));
} catch (ApiException e) {
System.err.println("Exception when calling CustomObjectsApi#createNamespacedCustomObject");
System.err.println("Status code: " + e.getCode());
System.err.println("Reason: " + e.getResponseBody());
System.err.println("Response headers: " + e.getResponseHeaders());
e.printStackTrace();
}catch (Exception e) {
e.printStackTrace();
}
try {
String group = "monitoring.coreos.com"; // String | The custom resource's group name
String version = "v1"; // String | The custom resource's version
String plural = "prometheusrules"; // String | The custom resource's plural name. For TPRs this would be lowercase plural kind.
String pretty = "true"; // String | If 'true', then the output is pretty printed.
String namespace = "kube-prom";
String name = "custom-rule2";
APIdeleteNamespacedCustomObjectRequest aPIdeleteNamespacedCustomObjectRequest = apiInstance.deleteNamespacedCustomObject(group, version, namespace, plural, name);
Object result = aPIdeleteNamespacedCustomObjectRequest.execute();
System.out.println(JSON.toJSON(result));
} catch (ApiException e) {
System.err.println("Exception when calling CustomObjectsApi#aPIdeleteNamespacedCustomObjectRequest");
System.err.println("Status code: " + e.getCode());
System.err.println("Reason: " + e.getResponseBody());
System.err.println("Response headers: " + e.getResponseHeaders());
e.printStackTrace();
}catch (Exception e) {
e.printStackTrace();
}
}
/**
* @param filename yaml文件名称
* @return 返回yaml文件中对应的json数据
*/
public static String yamlToJson(String filename) {
String jsonData;
try {
// 创建Yaml对象
Yaml yaml = new Yaml();
// 打开文件输入流
FileInputStream input = new FileInputStream(filename);
// 读取整个文件为一个Map对象,如果yaml文件为列表,则数据类型为list
Map<String, Object> data = yaml.load(input);
// 创建ObjectMapper对象用于将数据转换为JSON
ObjectMapper mapper = new ObjectMapper();
// 启用格式化输出
mapper.enable(SerializationFeature.INDENT_OUTPUT);
// 将数据转换为JSON字符串
jsonData = mapper.writeValueAsString(data);
// 返回读取的数据
return jsonData;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
}
注意需要使用以下命令查看kube-prometheus-stack内部自定义的标签
kubectl describe prometheusrules.monitoring.coreos.com kube-promethues-stack-kube-prometheus -n kube-prom
代码这部分的标签来源,是通过以上名查看的(如果标签跟默认的不相同,加载不到自定义规则)
labelsMap.put("app", "kube-prometheus-stack");
labelsMap.put("app.kubernetes.io/instance", "kube-promethues-stack");
labelsMap.put("app.kubernetes.io/managed-by", "Helm");
labelsMap.put("app.kubernetes.io/part-of", "kube-prometheus-stack");
labelsMap.put("app.kubernetes.io/version", "65.1.1");
labelsMap.put("chart", "kube-prometheus-stack-65.1.1");
labelsMap.put("heritage", "Helm");
labelsMap.put("release", "kube-promethues-stack");
参考:
https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack
https://cloud-atlas.readthedocs.io/zh-cn/latest/kubernetes/monitor/prometheus/helm3_prometheus_grafana.html
https://www.cnblogs.com/liugp/p/16444580.html
https://cloud-atlas.readthedocs.io/zh-cn/latest/kubernetes/monitor/prometheus/prometheus_rules/kube-prometheus-stack_alert_config.html
https://gitee.com/hadoop-bigdata/kube-prometheus