feat: add etcd io precheck.

Signed-off-by: joyceliu <joyceliu@yunify.com>
This commit is contained in:
joyceliu 2024-06-12 13:28:34 +08:00
parent 7247c2733c
commit f48624e097
13 changed files with 211 additions and 115 deletions

View File

@ -23,3 +23,5 @@ etcd:
keep_backup_number: 5
# etcd_backup_script: /usr/local/bin/kube-scripts/backup-etcd.sh
on_calendar: "*-*-* *:00/30:00"
performance: false
traffic_priority: false

View File

@ -5,6 +5,8 @@ After=network.target
[Service]
User=root
Type=notify
Nice=-20
OOMScoreAdjust=-1000
EnvironmentFile=/etc/etcd.env
ExecStart=/usr/local/bin/etcd
NotifyAccess=all

View File

@ -34,5 +34,20 @@
src: "etcd.service"
dest: "/etc/systemd/system/etcd.service"
# refer: https://etcd.io/docs/v3.5/tuning/
- name: Set cpu to performance
command: |
echo performance | tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
when: etcd.performance
- name: Set Traffic Priority
command: |
tc qdisc add dev eth0 root handle 1: prio bands 3
tc filter add dev eth0 parent 1: protocol ip prio 1 u32 match ip sport 2380 0xffff flowid 1:1
tc filter add dev eth0 parent 1: protocol ip prio 1 u32 match ip dport 2380 0xffff flowid 1:1
tc filter add dev eth0 parent 1: protocol ip prio 2 u32 match ip sport 2379 0xffff flowid 1:1
tc filter add dev eth0 parent 1: protocol ip prio 2 u32 match ip dport 2379 0xffff flowid 1:1
when: etcd.traffic_priority
- name: Start etcd service
command: systemctl daemon-reload && systemctl start etcd && systemctl enable etcd

View File

@ -0,0 +1,16 @@
---
- name: Stop if container manager is not docker or containerd
assert:
that: cri.container_manager in cluster_require.require_container_manager
fail_msg: "The container manager:{{ cri.container_manager }}, must be docker or containerd"
run_once: true
when: cri.container_manager | defined
- name: Ensure minimum containerd version
assert:
that: containerd_version | version:'>={{cluster_require.containerd_min_version_required}}'
fail_msg: "containerd_version is too low. Minimum version {{ cluster_require.containerd_min_version_required }}"
run_once: true
when:
- not containerd_version in cluster_require.require_containerd_version
- cri.container_manager == 'containerd'

View File

@ -0,0 +1,47 @@
---
- name: Stop if etcd deployment type is not internal or external
assert:
that: kubernetes.etcd.deployment_type in cluster_require.require_etcd_deployment_type
fail_msg: "The etcd deployment type, 'kubernetes.etcd.deployment_type', must be internal or external"
run_once: true
when: kubernetes.etcd.deployment_type | defined
- name: Stop if etcd group is empty in internal etcd mode
assert:
that: "'etcd' in groups"
fail_msg: "Group 'etcd' cannot be empty in external etcd mode"
run_once: true
when:
- kubernetes.etcd.deployment_type == "external"
- name: Stop if even number of etcd hosts
assert:
that: not groups.etcd | length | divisibleby:2
when:
- inventory_name in groups['etcd']
## https://cwiki.yunify.com/pages/viewpage.action?pageId=145920824
- name: Check dev io for etcd
when:
- inventory_name in groups['etcd']
block:
- name: Check fio is exist
ignore_errors: true
command: fio --version
register: fio_install_version
- name: Test dev io by fio
when: fio_install_version.stderr == ""
block:
- name: Get fio result
command: |
mkdir -p /tmp/kubekey/etcd/test-data
fio --rw=write --ioengine=sync --fdatasync=1 --directory=/tmp/kubekey/etcd/test-data --size=22m --bs=2300 --name=mytest --output-format=json
register: fio_result
- name: Check fio result
assert:
that: fio_result.stdout.jobs|first|get:'sync'|get:'lat_ns'|get:'percentile'|get:'90.000000' <= 10000
fail_msg: "etcd_disk_wal_fysnc_duration_seconds: {{ fio_result.stdout.jobs|first|get:'sync'|get:'lat_ns'|get:'percentile'|get:'90.000000' }}us is more than 10000us"
always:
- name: Clean test data dir
command: rm -rf /tmp/kubekey/etcd/test-data

View File

@ -1,31 +1,4 @@
---
- name: Stop if etcd deployment type is not internal or external
assert:
that: kubernetes.etcd.deployment_type in cluster_require.require_etcd_deployment_type
fail_msg: "The etcd deployment type, 'kubernetes.etcd.deployment_type', must be internal or external"
run_once: true
when: kubernetes.etcd.deployment_type | defined
- name: Stop if etcd group is empty in internal etcd mode
assert:
that: "'etcd' in groups"
fail_msg: "Group 'etcd' cannot be empty in external etcd mode"
run_once: true
when:
- kubernetes.etcd.deployment_type == "external"
- name: Stop if the os does not support
assert:
that: (cluster_require.allow_unsupported_distribution_setup) or (os.release.ID in cluster_require.supported_os_distributions)
fail_msg: "{{ os.release.ID }} is not a known OS"
- name: Stop if unknown network plugin
assert:
that: kubernetes.kube_network_plugin in cluster_require.require_network_plugin
fail_msg: "{{ kubernetes.kube_network_plugin }} is not supported"
when:
- kubernetes.kube_network_plugin | defined
- name: Stop if unsupported version of Kubernetes
assert:
that: kube_version | version:'>={{ cluster_require.kube_version_min_required }}'
@ -33,83 +6,12 @@
when:
- kube_version | defined
- name: Stop if even number of etcd hosts
assert:
that: not groups.etcd | length | divisibleby:2
when:
- inventory_name in groups['etcd']
- include_tasks: etcd.yaml
- name: Stop if memory is too small for masters
assert:
that: process.memInfo.MemTotal | cut:' kB' >= cluster_require.minimal_master_memory_mb
when:
- inventory_name in groups['kube_control_plane']
- include_tasks: os.yaml
- name: Stop if memory is too small for nodes
assert:
that: process.memInfo.MemTotal | cut:' kB' >= cluster_require.minimal_node_memory_mb
when:
- inventory_name in groups['kube_worker']
- include_tasks: network.yaml
# This assertion will fail on the safe side: One can indeed schedule more pods
# on a node than the CIDR-range has space for when additional pods use the host
# network namespace. It is impossible to ascertain the number of such pods at
# provisioning time, so to establish a guarantee, we factor these out.
# NOTICE: the check blatantly ignores the inet6-case
- name: Guarantee that enough network address space is available for all pods
assert:
that: "(kubernetes.kubelet.max_pods | integer) <= (2 | pow: {{ 32 - kubernetes.controller_manager.kube_network_node_prefix | integer }} - 2)"
fail_msg: "Do not schedule more pods on a node than inet addresses are available."
when:
- inventory_name in groups['k8s_cluster']
- kubernetes.controller_manager.kube_network_node_prefix | defined
- kubernetes.kube_network_plugin != 'calico'
- include_tasks: cri.yaml
#- name: Stop if access_ip is not pingable
# command: ping -c1 {{ access_ip }}
# when:
# - access_ip | defined
# - ping_access_ip
# changed_when: false
- name: Stop if kernel version is too low
assert:
that: os.kernel_version | split:'-' | first | version:'>=4.9.17'
when:
- kubernetes.kube_network_plugin == 'cilium'
# - kubernetes.kube_network_plugin == 'cilium' or (cilium_deploy_additionally | default:false)
- name: Stop if bad hostname
vars:
regex: '[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$'
assert:
that: inventory_name | match:regex
fail_msg: "Hostname must consist of lower case alphanumeric characters, '.' or '-', and must start and end with an alphanumeric character"
- name: Stop if container manager is not docker or containerd
assert:
that: cri.container_manager in cluster_require.require_container_manager
fail_msg: "The container manager:{{ cri.container_manager }}, must be docker or containerd"
run_once: true
when: cri.container_manager | defined
- name: Ensure minimum containerd version
assert:
that: containerd_version | version:'>={{cluster_require.containerd_min_version_required}}'
fail_msg: "containerd_version is too low. Minimum version {{ cluster_require.containerd_min_version_required }}"
run_once: true
when:
- not containerd_version in cluster_require.require_containerd_version
- cri.container_manager == 'containerd'
- name: Check os if supported
assert:
that: os.architecture in cluster_require.supported_architectures.amd64 or os.architecture in cluster_require.supported_architectures.arm64
success_msg: "{% if (os.architecture in cluster_require.supported_architectures.amd64) %}amd64{% else %}arm64{% endif %}"
register: binary_type
- name: Stop if nfs server is not be one
assert:
that: groups['nfs'] | length == 1
fail_msg: "Only one nfs server is supported"
when: groups['nfs'] | length > 0
- include_tasks: nfs.yaml

View File

@ -0,0 +1,22 @@
---
- name: Stop if unknown network plugin
assert:
that: kubernetes.kube_network_plugin in cluster_require.require_network_plugin
fail_msg: "{{ kubernetes.kube_network_plugin }} is not supported"
when:
- kubernetes.kube_network_plugin | defined
# This assertion will fail on the safe side: One can indeed schedule more pods
# on a node than the CIDR-range has space for when additional pods use the host
# network namespace. It is impossible to ascertain the number of such pods at
# provisioning time, so to establish a guarantee, we factor these out.
# NOTICE: the check blatantly ignores the inet6-case
- name: Guarantee that enough network address space is available for all pods
assert:
that: "(kubernetes.kubelet.max_pods | integer) <= (2 | pow: {{ 32 - kubernetes.controller_manager.kube_network_node_prefix | integer }} - 2)"
fail_msg: "Do not schedule more pods on a node than inet addresses are available."
when:
- inventory_name in groups['k8s_cluster']
- kubernetes.controller_manager.kube_network_node_prefix | defined
- kubernetes.kube_network_plugin != 'calico'

View File

@ -0,0 +1,6 @@
---
- name: Stop if nfs server is not be one
assert:
that: groups['nfs'] | length == 1
fail_msg: "Only one nfs server is supported"
when: groups['nfs'] | length > 0

View File

@ -0,0 +1,35 @@
---
- name: Stop if bad hostname
vars:
regex: '[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$'
assert:
that: inventory_name | match:regex
fail_msg: "Hostname must consist of lower case alphanumeric characters, '.' or '-', and must start and end with an alphanumeric character"
- name: Stop if the os does not support
assert:
that: (cluster_require.allow_unsupported_distribution_setup) or (os.release.ID in cluster_require.supported_os_distributions)
fail_msg: "{{ os.release.ID }} is not a known OS"
- name: Stop if arch supported
assert:
that: os.architecture in cluster_require.supported_architectures.amd64 or os.architecture in cluster_require.supported_architectures.arm64
success_msg: "{% if (os.architecture in cluster_require.supported_architectures.amd64) %}amd64{% else %}arm64{% endif %}"
fail_msg: "{{ os.architecture }} is not a known OS"
register: binary_type
- name: Stop if memory is too small for masters
assert:
that: process.memInfo.MemTotal | cut:' kB' >= cluster_require.minimal_master_memory_mb
when:
- inventory_name in groups['kube_control_plane']
- name: Stop if memory is too small for nodes
assert:
that: process.memInfo.MemTotal | cut:' kB' >= cluster_require.minimal_node_memory_mb
when:
- inventory_name in groups['kube_worker']
- name: Stop if kernel version is too low
assert:
that: os.kernel_version | split:'-' | first | version:'>=4.9.17'

View File

@ -36,6 +36,7 @@ func init() {
pongo2.RegisterFilter("to_json", filterToJson)
pongo2.RegisterFilter("to_yaml", filterToYaml)
pongo2.RegisterFilter("ip_range", filterIpRange)
pongo2.RegisterFilter("get", filterGet)
}
func filterDefined(in *pongo2.Value, param *pongo2.Value) (*pongo2.Value, *pongo2.Error) {
@ -175,3 +176,23 @@ func filterIpRange(in *pongo2.Value, param *pongo2.Value) (*pongo2.Value, *pongo
return pongo2.AsValue(ipRange), nil
}
// filterGet get value from map or array
func filterGet(in *pongo2.Value, param *pongo2.Value) (out *pongo2.Value, err *pongo2.Error) {
var result *pongo2.Value
in.Iterate(func(idx, count int, key, value *pongo2.Value) bool {
if param.IsInteger() && idx == param.Integer() {
result = in.Index(idx)
return false
}
if param.IsString() && key.String() == param.String() {
result = pongo2.AsValue(value.Interface())
return false
}
return true
}, func() {
result = pongo2.AsValue(nil)
})
return result, nil
}

View File

@ -141,6 +141,17 @@ func TestFilter(t *testing.T) {
},
except: "a = 23",
},
{
name: "get from map",
input: "{{ test|get:'a1' }}",
ctx: map[string]any{
"test": map[string]any{
"a1": 10,
"a2": "b2",
},
},
except: "10",
},
}
for _, tc := range testcases {

View File

@ -49,10 +49,12 @@ func TestParseBool(t *testing.T) {
},
{
name: "container string",
condition: []string{"test in instr"},
condition: []string{"instr[0].test"},
variable: pongo2.Context{
"test": "a1",
"instr": "vda hjilsa1 sdte",
"instr": []pongo2.Context{
{"test": true},
{"test": false},
},
},
excepted: true,
},

View File

@ -363,11 +363,21 @@ func (e executor) executeTask(ctx context.Context, task *kubekeyv1alpha1.Task, o
)
defer func() {
if task.Spec.Register != "" {
var stdoutResult any = stdout
var stderrResult any = stderr
// try to convert by json
if err := json.Unmarshal([]byte(stdout), &stdoutResult); err != nil {
// dothing
}
// try to convert by json
if err := json.Unmarshal([]byte(stderr), &stderrResult); err != nil {
// dothing
}
// set variable to parent location
if err := e.variable.Merge(variable.MergeRuntimeVariable(host, map[string]any{
task.Spec.Register: map[string]string{
"stdout": stdout,
"stderr": stderr,
task.Spec.Register: map[string]any{
"stdout": stdoutResult,
"stderr": stderrResult,
},
})); err != nil {
stderr = fmt.Sprintf("register task result to variable error: %v", err)
@ -375,11 +385,19 @@ func (e executor) executeTask(ctx context.Context, task *kubekeyv1alpha1.Task, o
}
}
if stderr != "" {
klog.Errorf("[Task %s] run failed: %s", ctrlclient.ObjectKeyFromObject(task), stderr)
switch {
case stderr != "": // failed
bar.Describe(fmt.Sprintf("[%s] failed", h))
bar.Finish()
klog.Errorf("[Task %s] run failed: %s", ctrlclient.ObjectKeyFromObject(task), stderr)
case stdout == "skip": // skip
bar.Describe(fmt.Sprintf("[%s] skip", h))
bar.Finish()
default: //success
bar.Describe(fmt.Sprintf("[%s] success", h))
bar.Finish()
}
bar.Finish()
// fill result
dataChan <- kubekeyv1alpha1.TaskHostResult{
Host: host,
@ -410,7 +428,6 @@ func (e executor) executeTask(ctx context.Context, task *kubekeyv1alpha1.Task, o
}
if !ok {
stdout = "skip"
bar.Describe(fmt.Sprintf("[%s] skip", h))
return
}
}
@ -441,8 +458,6 @@ func (e executor) executeTask(ctx context.Context, task *kubekeyv1alpha1.Task, o
}
bar.Add(1)
}
bar.Describe(fmt.Sprintf("[%s] success", h))
})
}
go func() {