0
点赞
收藏
分享

微信扫一扫

k8s 健康检查probe,exec方式,docker puase后不能隔离路由

code_balance 2022-01-26 阅读 69

问题

  1. 服务配置的健康检查为脚本的方式
  2. 使用docker pause命令的方式,将容器里面的所有进程挂住
  3. readiness失败,不会将endpoint从service里面隔离出,客户端调用会导致偶现挂住

原因

  1. k8s这种场景下,不会认为健康检查是失败的,不会记录失败次数,只是不断重试,然后上报event

  2. 代码的实现

    
    func (pb *prober) probe(probeType probeType, pod *v1.Pod, status v1.PodStatus, container v1.Container, containerID kubecontainer.ContainerID) (results.Result, error) {
        ...
        result, output, err := pb.runProbeWithRetries(probeType, probeSpec, pod, status, container, containerID, maxProbeRetries)
        if err != nil || (result != probe.Success && result != probe.Warning) {
    	// Probe failed in one way or another.
    	if err != nil {
    		    klog.V(1).ErrorS(err, "Probe errored", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name)
    		    pb.recordContainerEvent(pod, &container, v1.EventTypeWarning, events.ContainerUnhealthy, "%s probe errored: %v", probeType, err)
    	    } else { // result != probe.Success
    		    klog.V(1).InfoS("Probe failed", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "probeResult", result, "output", output)
    		    pb.recordContainerEvent(pod, &container, v1.EventTypeWarning, events.ContainerUnhealthy, "%s probe failed: %s", probeType, output)
    	    }
    	    return results.Failure, err
        }
        if result == probe.Warning {
    	    pb.recordContainerEvent(pod, &container, v1.EventTypeWarning, events.ContainerProbeWarning, "%s probe warning: %s", probeType, output)
    	    klog.V(3).InfoS("Probe succeeded with a warning", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "output", output)
        } else {
    	    klog.V(3).InfoS("Probe succeeded", "probeType", probeType, "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name)
        }
        ...
    }
    
    
    
    
    func (w *worker) doProbe() (keepGoing bool) {
    ...
        // exec 执行失败,会返回true,继续重试
        result, err := w.probeManager.prober.probe(w.probeType, w.pod, status, w.container, w.containerID)
        if err != nil {
        	// Prober error, throw away the result.
        	return true
        }
    
        switch result {
        case results.Success:
    	    ProberResults.With(w.proberResultsSuccessfulMetricLabels).Inc()
        case results.Failure:
    	    ProberResults.With(w.proberResultsFailedMetricLabels).Inc()
        default:
    	    ProberResults.With(w.proberResultsUnknownMetricLabels).Inc()
        }
        ...
    }
    
    

解决方式

  1. 如果是7层的,可以通过接口的方式进行验证,可以使用httpGet的方式进行替代,
    httpGet:
      path: xxx // 对应的uri
      schema: HTTPS // HTTP、HTTPS
      port: xxx  // 监听的port
      host: xxx  // 可选,默认为pod ip
    
  2. 如果是4层的
     tcpSocket
       port: xxx
       host: xxx // 可选参数,默认是pod ip
    
举报

相关推荐

0 条评论