From f48624e097edbc00a0f53651fdc49ac66fcb5149 Mon Sep 17 00:00:00 2001 From: joyceliu Date: Wed, 12 Jun 2024 13:28:34 +0800 Subject: [PATCH] feat: add etcd io precheck. Signed-off-by: joyceliu --- builtin/roles/install/etcd/defaults/main.yaml | 2 + builtin/roles/install/etcd/files/etcd.service | 2 + .../install/etcd/tasks/install_etcd.yaml | 15 +++ .../roles/precheck/env_check/tasks/cri.yaml | 16 +++ .../roles/precheck/env_check/tasks/etcd.yaml | 47 ++++++++ .../roles/precheck/env_check/tasks/main.yaml | 108 +----------------- .../precheck/env_check/tasks/network.yaml | 22 ++++ .../roles/precheck/env_check/tasks/nfs.yaml | 6 + .../roles/precheck/env_check/tasks/os.yaml | 35 ++++++ pkg/converter/tmpl/filter_extension.go | 21 ++++ pkg/converter/tmpl/filter_extension_test.go | 11 ++ pkg/converter/tmpl/template_test.go | 8 +- pkg/executor/executor.go | 33 ++++-- 13 files changed, 211 insertions(+), 115 deletions(-) create mode 100644 builtin/roles/precheck/env_check/tasks/cri.yaml create mode 100644 builtin/roles/precheck/env_check/tasks/etcd.yaml create mode 100644 builtin/roles/precheck/env_check/tasks/network.yaml create mode 100644 builtin/roles/precheck/env_check/tasks/nfs.yaml create mode 100644 builtin/roles/precheck/env_check/tasks/os.yaml diff --git a/builtin/roles/install/etcd/defaults/main.yaml b/builtin/roles/install/etcd/defaults/main.yaml index b331ca3d..703aa936 100644 --- a/builtin/roles/install/etcd/defaults/main.yaml +++ b/builtin/roles/install/etcd/defaults/main.yaml @@ -23,3 +23,5 @@ etcd: keep_backup_number: 5 # etcd_backup_script: /usr/local/bin/kube-scripts/backup-etcd.sh on_calendar: "*-*-* *:00/30:00" + performance: false + traffic_priority: false diff --git a/builtin/roles/install/etcd/files/etcd.service b/builtin/roles/install/etcd/files/etcd.service index d26a6958..178c3e24 100644 --- a/builtin/roles/install/etcd/files/etcd.service +++ b/builtin/roles/install/etcd/files/etcd.service @@ -5,6 +5,8 @@ After=network.target [Service] User=root Type=notify +Nice=-20 +OOMScoreAdjust=-1000 EnvironmentFile=/etc/etcd.env ExecStart=/usr/local/bin/etcd NotifyAccess=all diff --git a/builtin/roles/install/etcd/tasks/install_etcd.yaml b/builtin/roles/install/etcd/tasks/install_etcd.yaml index f25ab7cf..a8072fb0 100644 --- a/builtin/roles/install/etcd/tasks/install_etcd.yaml +++ b/builtin/roles/install/etcd/tasks/install_etcd.yaml @@ -34,5 +34,20 @@ src: "etcd.service" dest: "/etc/systemd/system/etcd.service" +# refer: https://etcd.io/docs/v3.5/tuning/ +- name: Set cpu to performance + command: | + echo performance | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor + when: etcd.performance + +- name: Set Traffic Priority + command: | + tc qdisc add dev eth0 root handle 1: prio bands 3 + tc filter add dev eth0 parent 1: protocol ip prio 1 u32 match ip sport 2380 0xffff flowid 1:1 + tc filter add dev eth0 parent 1: protocol ip prio 1 u32 match ip dport 2380 0xffff flowid 1:1 + tc filter add dev eth0 parent 1: protocol ip prio 2 u32 match ip sport 2379 0xffff flowid 1:1 + tc filter add dev eth0 parent 1: protocol ip prio 2 u32 match ip dport 2379 0xffff flowid 1:1 + when: etcd.traffic_priority + - name: Start etcd service command: systemctl daemon-reload && systemctl start etcd && systemctl enable etcd diff --git a/builtin/roles/precheck/env_check/tasks/cri.yaml b/builtin/roles/precheck/env_check/tasks/cri.yaml new file mode 100644 index 00000000..3d1f0b94 --- /dev/null +++ b/builtin/roles/precheck/env_check/tasks/cri.yaml @@ -0,0 +1,16 @@ +--- +- name: Stop if container manager is not docker or containerd + assert: + that: cri.container_manager in cluster_require.require_container_manager + fail_msg: "The container manager:{{ cri.container_manager }}, must be docker or containerd" + run_once: true + when: cri.container_manager | defined + +- name: Ensure minimum containerd version + assert: + that: containerd_version | version:'>={{cluster_require.containerd_min_version_required}}' + fail_msg: "containerd_version is too low. Minimum version {{ cluster_require.containerd_min_version_required }}" + run_once: true + when: + - not containerd_version in cluster_require.require_containerd_version + - cri.container_manager == 'containerd' diff --git a/builtin/roles/precheck/env_check/tasks/etcd.yaml b/builtin/roles/precheck/env_check/tasks/etcd.yaml new file mode 100644 index 00000000..7e428109 --- /dev/null +++ b/builtin/roles/precheck/env_check/tasks/etcd.yaml @@ -0,0 +1,47 @@ +--- +- name: Stop if etcd deployment type is not internal or external + assert: + that: kubernetes.etcd.deployment_type in cluster_require.require_etcd_deployment_type + fail_msg: "The etcd deployment type, 'kubernetes.etcd.deployment_type', must be internal or external" + run_once: true + when: kubernetes.etcd.deployment_type | defined + +- name: Stop if etcd group is empty in internal etcd mode + assert: + that: "'etcd' in groups" + fail_msg: "Group 'etcd' cannot be empty in external etcd mode" + run_once: true + when: + - kubernetes.etcd.deployment_type == "external" + +- name: Stop if even number of etcd hosts + assert: + that: not groups.etcd | length | divisibleby:2 + when: + - inventory_name in groups['etcd'] + +## https://cwiki.yunify.com/pages/viewpage.action?pageId=145920824 +- name: Check dev io for etcd + when: + - inventory_name in groups['etcd'] + block: + - name: Check fio is exist + ignore_errors: true + command: fio --version + register: fio_install_version + - name: Test dev io by fio + when: fio_install_version.stderr == "" + block: + - name: Get fio result + command: | + mkdir -p /tmp/kubekey/etcd/test-data + fio --rw=write --ioengine=sync --fdatasync=1 --directory=/tmp/kubekey/etcd/test-data --size=22m --bs=2300 --name=mytest --output-format=json + register: fio_result + - name: Check fio result + assert: + that: fio_result.stdout.jobs|first|get:'sync'|get:'lat_ns'|get:'percentile'|get:'90.000000' <= 10000 + fail_msg: "etcd_disk_wal_fysnc_duration_seconds: {{ fio_result.stdout.jobs|first|get:'sync'|get:'lat_ns'|get:'percentile'|get:'90.000000' }}us is more than 10000us" + always: + - name: Clean test data dir + command: rm -rf /tmp/kubekey/etcd/test-data + diff --git a/builtin/roles/precheck/env_check/tasks/main.yaml b/builtin/roles/precheck/env_check/tasks/main.yaml index 3784388d..db0735db 100644 --- a/builtin/roles/precheck/env_check/tasks/main.yaml +++ b/builtin/roles/precheck/env_check/tasks/main.yaml @@ -1,31 +1,4 @@ --- -- name: Stop if etcd deployment type is not internal or external - assert: - that: kubernetes.etcd.deployment_type in cluster_require.require_etcd_deployment_type - fail_msg: "The etcd deployment type, 'kubernetes.etcd.deployment_type', must be internal or external" - run_once: true - when: kubernetes.etcd.deployment_type | defined - -- name: Stop if etcd group is empty in internal etcd mode - assert: - that: "'etcd' in groups" - fail_msg: "Group 'etcd' cannot be empty in external etcd mode" - run_once: true - when: - - kubernetes.etcd.deployment_type == "external" - -- name: Stop if the os does not support - assert: - that: (cluster_require.allow_unsupported_distribution_setup) or (os.release.ID in cluster_require.supported_os_distributions) - fail_msg: "{{ os.release.ID }} is not a known OS" - -- name: Stop if unknown network plugin - assert: - that: kubernetes.kube_network_plugin in cluster_require.require_network_plugin - fail_msg: "{{ kubernetes.kube_network_plugin }} is not supported" - when: - - kubernetes.kube_network_plugin | defined - - name: Stop if unsupported version of Kubernetes assert: that: kube_version | version:'>={{ cluster_require.kube_version_min_required }}' @@ -33,83 +6,12 @@ when: - kube_version | defined -- name: Stop if even number of etcd hosts - assert: - that: not groups.etcd | length | divisibleby:2 - when: - - inventory_name in groups['etcd'] +- include_tasks: etcd.yaml -- name: Stop if memory is too small for masters - assert: - that: process.memInfo.MemTotal | cut:' kB' >= cluster_require.minimal_master_memory_mb - when: - - inventory_name in groups['kube_control_plane'] +- include_tasks: os.yaml -- name: Stop if memory is too small for nodes - assert: - that: process.memInfo.MemTotal | cut:' kB' >= cluster_require.minimal_node_memory_mb - when: - - inventory_name in groups['kube_worker'] +- include_tasks: network.yaml -# This assertion will fail on the safe side: One can indeed schedule more pods -# on a node than the CIDR-range has space for when additional pods use the host -# network namespace. It is impossible to ascertain the number of such pods at -# provisioning time, so to establish a guarantee, we factor these out. -# NOTICE: the check blatantly ignores the inet6-case -- name: Guarantee that enough network address space is available for all pods - assert: - that: "(kubernetes.kubelet.max_pods | integer) <= (2 | pow: {{ 32 - kubernetes.controller_manager.kube_network_node_prefix | integer }} - 2)" - fail_msg: "Do not schedule more pods on a node than inet addresses are available." - when: - - inventory_name in groups['k8s_cluster'] - - kubernetes.controller_manager.kube_network_node_prefix | defined - - kubernetes.kube_network_plugin != 'calico' +- include_tasks: cri.yaml -#- name: Stop if access_ip is not pingable -# command: ping -c1 {{ access_ip }} -# when: -# - access_ip | defined -# - ping_access_ip -# changed_when: false - -- name: Stop if kernel version is too low - assert: - that: os.kernel_version | split:'-' | first | version:'>=4.9.17' - when: - - kubernetes.kube_network_plugin == 'cilium' -# - kubernetes.kube_network_plugin == 'cilium' or (cilium_deploy_additionally | default:false) - -- name: Stop if bad hostname - vars: - regex: '[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$' - assert: - that: inventory_name | match:regex - fail_msg: "Hostname must consist of lower case alphanumeric characters, '.' or '-', and must start and end with an alphanumeric character" - -- name: Stop if container manager is not docker or containerd - assert: - that: cri.container_manager in cluster_require.require_container_manager - fail_msg: "The container manager:{{ cri.container_manager }}, must be docker or containerd" - run_once: true - when: cri.container_manager | defined - -- name: Ensure minimum containerd version - assert: - that: containerd_version | version:'>={{cluster_require.containerd_min_version_required}}' - fail_msg: "containerd_version is too low. Minimum version {{ cluster_require.containerd_min_version_required }}" - run_once: true - when: - - not containerd_version in cluster_require.require_containerd_version - - cri.container_manager == 'containerd' - -- name: Check os if supported - assert: - that: os.architecture in cluster_require.supported_architectures.amd64 or os.architecture in cluster_require.supported_architectures.arm64 - success_msg: "{% if (os.architecture in cluster_require.supported_architectures.amd64) %}amd64{% else %}arm64{% endif %}" - register: binary_type - -- name: Stop if nfs server is not be one - assert: - that: groups['nfs'] | length == 1 - fail_msg: "Only one nfs server is supported" - when: groups['nfs'] | length > 0 +- include_tasks: nfs.yaml diff --git a/builtin/roles/precheck/env_check/tasks/network.yaml b/builtin/roles/precheck/env_check/tasks/network.yaml new file mode 100644 index 00000000..52b4ebe1 --- /dev/null +++ b/builtin/roles/precheck/env_check/tasks/network.yaml @@ -0,0 +1,22 @@ +--- +- name: Stop if unknown network plugin + assert: + that: kubernetes.kube_network_plugin in cluster_require.require_network_plugin + fail_msg: "{{ kubernetes.kube_network_plugin }} is not supported" + when: + - kubernetes.kube_network_plugin | defined + +# This assertion will fail on the safe side: One can indeed schedule more pods +# on a node than the CIDR-range has space for when additional pods use the host +# network namespace. It is impossible to ascertain the number of such pods at +# provisioning time, so to establish a guarantee, we factor these out. +# NOTICE: the check blatantly ignores the inet6-case +- name: Guarantee that enough network address space is available for all pods + assert: + that: "(kubernetes.kubelet.max_pods | integer) <= (2 | pow: {{ 32 - kubernetes.controller_manager.kube_network_node_prefix | integer }} - 2)" + fail_msg: "Do not schedule more pods on a node than inet addresses are available." + when: + - inventory_name in groups['k8s_cluster'] + - kubernetes.controller_manager.kube_network_node_prefix | defined + - kubernetes.kube_network_plugin != 'calico' + diff --git a/builtin/roles/precheck/env_check/tasks/nfs.yaml b/builtin/roles/precheck/env_check/tasks/nfs.yaml new file mode 100644 index 00000000..c67438d2 --- /dev/null +++ b/builtin/roles/precheck/env_check/tasks/nfs.yaml @@ -0,0 +1,6 @@ +--- +- name: Stop if nfs server is not be one + assert: + that: groups['nfs'] | length == 1 + fail_msg: "Only one nfs server is supported" + when: groups['nfs'] | length > 0 diff --git a/builtin/roles/precheck/env_check/tasks/os.yaml b/builtin/roles/precheck/env_check/tasks/os.yaml new file mode 100644 index 00000000..7ae96869 --- /dev/null +++ b/builtin/roles/precheck/env_check/tasks/os.yaml @@ -0,0 +1,35 @@ +--- +- name: Stop if bad hostname + vars: + regex: '[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$' + assert: + that: inventory_name | match:regex + fail_msg: "Hostname must consist of lower case alphanumeric characters, '.' or '-', and must start and end with an alphanumeric character" + +- name: Stop if the os does not support + assert: + that: (cluster_require.allow_unsupported_distribution_setup) or (os.release.ID in cluster_require.supported_os_distributions) + fail_msg: "{{ os.release.ID }} is not a known OS" + +- name: Stop if arch supported + assert: + that: os.architecture in cluster_require.supported_architectures.amd64 or os.architecture in cluster_require.supported_architectures.arm64 + success_msg: "{% if (os.architecture in cluster_require.supported_architectures.amd64) %}amd64{% else %}arm64{% endif %}" + fail_msg: "{{ os.architecture }} is not a known OS" + register: binary_type + +- name: Stop if memory is too small for masters + assert: + that: process.memInfo.MemTotal | cut:' kB' >= cluster_require.minimal_master_memory_mb + when: + - inventory_name in groups['kube_control_plane'] + +- name: Stop if memory is too small for nodes + assert: + that: process.memInfo.MemTotal | cut:' kB' >= cluster_require.minimal_node_memory_mb + when: + - inventory_name in groups['kube_worker'] + +- name: Stop if kernel version is too low + assert: + that: os.kernel_version | split:'-' | first | version:'>=4.9.17' diff --git a/pkg/converter/tmpl/filter_extension.go b/pkg/converter/tmpl/filter_extension.go index 22ee5f3b..06819df5 100644 --- a/pkg/converter/tmpl/filter_extension.go +++ b/pkg/converter/tmpl/filter_extension.go @@ -36,6 +36,7 @@ func init() { pongo2.RegisterFilter("to_json", filterToJson) pongo2.RegisterFilter("to_yaml", filterToYaml) pongo2.RegisterFilter("ip_range", filterIpRange) + pongo2.RegisterFilter("get", filterGet) } func filterDefined(in *pongo2.Value, param *pongo2.Value) (*pongo2.Value, *pongo2.Error) { @@ -175,3 +176,23 @@ func filterIpRange(in *pongo2.Value, param *pongo2.Value) (*pongo2.Value, *pongo return pongo2.AsValue(ipRange), nil } + +// filterGet get value from map or array +func filterGet(in *pongo2.Value, param *pongo2.Value) (out *pongo2.Value, err *pongo2.Error) { + var result *pongo2.Value + in.Iterate(func(idx, count int, key, value *pongo2.Value) bool { + if param.IsInteger() && idx == param.Integer() { + result = in.Index(idx) + return false + } + if param.IsString() && key.String() == param.String() { + result = pongo2.AsValue(value.Interface()) + return false + } + return true + }, func() { + result = pongo2.AsValue(nil) + }) + + return result, nil +} diff --git a/pkg/converter/tmpl/filter_extension_test.go b/pkg/converter/tmpl/filter_extension_test.go index e5195563..b4261ca5 100644 --- a/pkg/converter/tmpl/filter_extension_test.go +++ b/pkg/converter/tmpl/filter_extension_test.go @@ -141,6 +141,17 @@ func TestFilter(t *testing.T) { }, except: "a = 23", }, + { + name: "get from map", + input: "{{ test|get:'a1' }}", + ctx: map[string]any{ + "test": map[string]any{ + "a1": 10, + "a2": "b2", + }, + }, + except: "10", + }, } for _, tc := range testcases { diff --git a/pkg/converter/tmpl/template_test.go b/pkg/converter/tmpl/template_test.go index 3af89c59..6e00c139 100644 --- a/pkg/converter/tmpl/template_test.go +++ b/pkg/converter/tmpl/template_test.go @@ -49,10 +49,12 @@ func TestParseBool(t *testing.T) { }, { name: "container string", - condition: []string{"test in instr"}, + condition: []string{"instr[0].test"}, variable: pongo2.Context{ - "test": "a1", - "instr": "vda hjilsa1 sdte", + "instr": []pongo2.Context{ + {"test": true}, + {"test": false}, + }, }, excepted: true, }, diff --git a/pkg/executor/executor.go b/pkg/executor/executor.go index 8342f7bd..886acc57 100644 --- a/pkg/executor/executor.go +++ b/pkg/executor/executor.go @@ -363,11 +363,21 @@ func (e executor) executeTask(ctx context.Context, task *kubekeyv1alpha1.Task, o ) defer func() { if task.Spec.Register != "" { + var stdoutResult any = stdout + var stderrResult any = stderr + // try to convert by json + if err := json.Unmarshal([]byte(stdout), &stdoutResult); err != nil { + // dothing + } + // try to convert by json + if err := json.Unmarshal([]byte(stderr), &stderrResult); err != nil { + // dothing + } // set variable to parent location if err := e.variable.Merge(variable.MergeRuntimeVariable(host, map[string]any{ - task.Spec.Register: map[string]string{ - "stdout": stdout, - "stderr": stderr, + task.Spec.Register: map[string]any{ + "stdout": stdoutResult, + "stderr": stderrResult, }, })); err != nil { stderr = fmt.Sprintf("register task result to variable error: %v", err) @@ -375,11 +385,19 @@ func (e executor) executeTask(ctx context.Context, task *kubekeyv1alpha1.Task, o } } - if stderr != "" { - klog.Errorf("[Task %s] run failed: %s", ctrlclient.ObjectKeyFromObject(task), stderr) + switch { + case stderr != "": // failed bar.Describe(fmt.Sprintf("[%s] failed", h)) + bar.Finish() + klog.Errorf("[Task %s] run failed: %s", ctrlclient.ObjectKeyFromObject(task), stderr) + case stdout == "skip": // skip + bar.Describe(fmt.Sprintf("[%s] skip", h)) + bar.Finish() + default: //success + bar.Describe(fmt.Sprintf("[%s] success", h)) + bar.Finish() } - bar.Finish() + // fill result dataChan <- kubekeyv1alpha1.TaskHostResult{ Host: host, @@ -410,7 +428,6 @@ func (e executor) executeTask(ctx context.Context, task *kubekeyv1alpha1.Task, o } if !ok { stdout = "skip" - bar.Describe(fmt.Sprintf("[%s] skip", h)) return } } @@ -441,8 +458,6 @@ func (e executor) executeTask(ctx context.Context, task *kubekeyv1alpha1.Task, o } bar.Add(1) } - - bar.Describe(fmt.Sprintf("[%s] success", h)) }) } go func() {