-
Notifications
You must be signed in to change notification settings - Fork 69
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Azure Orchestration support via Pulumi
- Loading branch information
Showing
19 changed files
with
1,074 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
inventory.*.yaml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,180 @@ | ||
# install python for full ansible support | ||
- hosts: wallaroo-leaders:wallaroo-followers | ||
gather_facts: False | ||
tasks: | ||
- name: update apt | ||
raw: which python || apt-get update | ||
become: yes | ||
- name: install python for full ansible support | ||
raw: which python || apt-get install -y python | ||
become: yes | ||
- name: gather facts | ||
action: setup | ||
- name: create variable | ||
set_fact: | ||
extra_grub_args: 'tsc=reliable skew_tick=y pci=pcie_bus_perf pcie_aspm=off pcie_port_pm=off rcupdate.rcu_normal=1 rcutree.kthread_prio=50 workqueue.power_efficient=0 idle=nomwait audit=0 nosoftlockup=0 nohz_full=1-{{ ((ansible_processor_vcpus/2)-1) | int }} rcu_nocbs=1-{{ ((ansible_processor_vcpus/2)-1) | int }} rcu_nocb_poll transparent_hugepage=never workqueue.watchdog_thresh=0 intel_idle.max_cstate=1 maxcpus={{ ((ansible_processor_vcpus/2)+1) | int }} nr_cpus={{ ((ansible_processor_vcpus/2)+1) | int }} possible_cpus={{ ((ansible_processor_vcpus/2)+1) | int }} {{ "isolcpus=1-" + (((ansible_processor_vcpus/2)-1) | int) | string if system_cpus is defined and isolcpus is defined and isolcpus == "true" else "" }}' | ||
- name: prep for changing boot param | ||
shell: sed -i -e '/^GRUB_CMDLINE_LINUX_DEFAULT=".* {{ extra_grub_args }}"/! s/\(^GRUB_CMDLINE_LINUX_DEFAULT=".*\) tsc=reliable.*"/\1"/' /etc/default/grub.d/50-cloudimg-settings.cfg | ||
become: yes | ||
- name: set tsc reliable, max c-state and max cpus as boot param | ||
lineinfile: | ||
dest: /etc/default/grub.d/50-cloudimg-settings.cfg | ||
regexp: '(^GRUB_CMDLINE_LINUX_DEFAULT="[^"]*)(?<!{{ extra_grub_args }})"$' | ||
line: '\1 {{ extra_grub_args }}"' | ||
backrefs: true | ||
state: present | ||
backup: true | ||
register: gfile | ||
become: yes | ||
- name: update grub | ||
command: update-grub | ||
become: yes | ||
when: (gfile|changed) | ||
- name: Gather network driver type | ||
shell: ethtool -i {{ ansible_default_ipv4.interface }} | grep -i "^driver:" | grep -Po "([^ ]*)$" | ||
register: network_driver_name | ||
ignore_errors: yes | ||
|
||
|
||
- hosts: wallaroo-followers | ||
tasks: | ||
- name: Create a group of all followers for wallaroo | ||
group_by: key=wallaroo-followers | ||
- name: set hostname | ||
hostname: name=wallaroo-follower-{{ groups['wallaroo-followers'].index(inventory_hostname) + 1 }} | ||
become: yes | ||
|
||
- hosts: wallaroo-leaders | ||
vars: | ||
ethernet_interface: "{{ ansible_default_ipv4.interface }}" | ||
tasks: | ||
- name: Create a group of all leaders for wallaroo | ||
group_by: key=wallaroo-leaders | ||
- name: Set IPs for ptpd unicast | ||
set_fact: ptpd_destinations="{% if groups['wallaroo-followers'] is defined %}{% for host in groups['wallaroo-followers'] %}{{ hostvars[host]['ansible_' + ethernet_interface]['ipv4']['address'] }}{% if not loop.last %},{% endif %}{% endfor %}{% else %}127.0.0.1{% endif %}" | ||
- name: set hostname | ||
hostname: name=wallaroo-leader-{{ groups['wallaroo-leaders'].index(inventory_hostname) + 1 }} | ||
become: yes | ||
|
||
- hosts: wallaroo-leaders:wallaroo-followers | ||
tasks: | ||
- name: "Add follower hostnames to hosts file" | ||
lineinfile: | ||
dest: /etc/hosts | ||
regexp: .*wallaroo-follower-{{ groups['wallaroo-followers'].index(item) + 1 }}$ | ||
line: "{{ hostvars[item].ansible_default_ipv4.address }} wallaroo-follower-{{ groups['wallaroo-followers'].index(item) + 1 }}" | ||
state: present | ||
become: yes | ||
with_inventory_hostnames: wallaroo-followers | ||
- name: "Add leader hostnames to hosts file" | ||
lineinfile: | ||
dest: /etc/hosts | ||
regexp: .*wallaroo-leader-{{ groups['wallaroo-leaders'].index(item) + 1 }}$ | ||
line: "{{ hostvars[item].ansible_default_ipv4.address }} wallaroo-leader-{{ groups['wallaroo-leaders'].index(item) + 1 }}" | ||
state: present | ||
become: yes | ||
with_inventory_hostnames: wallaroo-leaders | ||
- name: Create a group of all hosts for wallaroo | ||
group_by: key=wallaroo-all | ||
- name: set variable for disks that can be raided | ||
set_fact: | ||
disks: "{{ hostvars[inventory_hostname]['ansible_devices'].keys() | difference(['xvda','sda', 'sr0']) }}" | ||
- name: set variable for disks that can be raided | ||
set_fact: | ||
interim_string: "{% for item in disks %}/dev/{{item}} {% endfor %}" | ||
- name: set variable for disks that can be raided | ||
set_fact: | ||
disks_list: "{{ interim_string.split() }}" | ||
- name: unmount /mnt for raid | ||
mount: | ||
name: "/mnt" | ||
state: absent | ||
src: "" | ||
fstype: "" | ||
become: yes | ||
- name: sysctl speed change for raid build | ||
sysctl: | ||
name: dev.raid.speed_limit_max | ||
value: 2000000000 | ||
state: present | ||
sysctl_set: yes | ||
reload: yes | ||
become: yes | ||
- name: sysctl speed change for raid build | ||
sysctl: | ||
name: dev.raid.speed_limit_min | ||
value: 2000000000 | ||
state: present | ||
sysctl_set: yes | ||
reload: yes | ||
become: yes | ||
- name: create /data | ||
file: | ||
path: /data | ||
state: directory | ||
mode: 0777 | ||
become: yes | ||
|
||
# Apply common configuration to all hosts | ||
- hosts: wallaroo-all | ||
vars: | ||
software_raid_create_kwargs: "--run" # force the creation if there are any prompts | ||
software_raid_devices: | ||
- device: /dev/md127 | ||
level: "{{ raid_level if raid_level is defined else '0' }}" | ||
components: "{{ disks_list }}" | ||
filesystem_type: "{{ raid_fs if raid_fs is defined else 'ext4' }}" | ||
# tell mkfs not to use `disard/TRIM` on blocks because aws pre-discards all blocks | ||
mkfs_options: "{{ '' if raid_fs is defined and raid_fs != 'ext4' else '-E nodiscard' }}" | ||
mount_point: "/data" | ||
mount_options: "noatime,nodiratime,discard" | ||
dump: 0 | ||
passno: 0 | ||
roles: | ||
- { role: common, become: yes } | ||
- { role: jacoelho.softwareraid, become: yes, when: disks_list | length > 0} | ||
|
||
# Configure and deploy leader servers. | ||
- hosts: wallaroo-leaders | ||
vars: | ||
ethernet_interface: "{{ ansible_default_ipv4.interface }}" | ||
ptpd_role: master | ||
ptpd_transport: unicast | ||
swarm_image: swarm | ||
consul_image: gliderlabs/consul | ||
docker_users: [ ubuntu, wallaroo ] | ||
docker_dockerpy_version: "1.9.0" | ||
roles: | ||
- { role: ptpd, become: yes } | ||
- { role: docker, become: yes } | ||
|
||
# Configure and deploy follower servers. | ||
- hosts: wallaroo-followers | ||
vars: | ||
ethernet_interface: "{{ ansible_default_ipv4.interface }}" | ||
ptpd_role: slave | ||
ptpd_transport: unicast | ||
leader_ip: "{{ hostvars[groups['wallaroo-leaders'][0]]['ansible_' + ethernet_interface]['ipv4']['address'] }}" | ||
swarm_image: swarm | ||
consul_image: gliderlabs/consul | ||
docker_users: [ wallaroo ] | ||
docker_dockerpy_version: "1.9.0" | ||
roles: | ||
- { role: ptpd, become: yes } | ||
- { role: docker, become: yes } | ||
|
||
- hosts: wallaroo-leaders:wallaroo-followers | ||
tasks: | ||
- name: change ptpd to be realtime priority | ||
shell: chrt -f -p 80 $(pidof ptpd) | ||
become: yes | ||
- name: unload jfs module | ||
modprobe: | ||
name: jfs | ||
state: absent | ||
become: yes | ||
- name: unload xfs module | ||
modprobe: | ||
name: xfs | ||
state: absent | ||
become: yes |
77 changes: 77 additions & 0 deletions
77
orchestration/ansible/playbooks/roles/common/files/create_cpu_shield_azure.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
#!/bin/bash | ||
set -euox pipefail | ||
|
||
SYS_CPUS=${1:-} | ||
|
||
if [ "${SYS_CPUS}" == "" ]; then | ||
echo "Disabling cpu isolation if it is enabled." | ||
cset set -l -r | ||
system=`cset set -l -r | grep -o '/system' || true` | ||
user=`cset set -l -r | grep -o '/user' || true` | ||
if [ "${system}" != "" ]; then | ||
cset set --destroy system | ||
fi | ||
if [ "${user}" != "" ]; then | ||
cset set --destroy user | ||
fi | ||
cset set -l -r | ||
echo "Done disabling cpu isolation if it is enabled." | ||
else | ||
old_sys_cpus=`cset set -l -r system | grep '/system$' | awk '{print $2}' || true` | ||
if [ "${old_sys_cpus}" == "${SYS_CPUS}" ]; then | ||
echo "SYS CPUS (${old_sys_cpus}) didn't change. Nothing to do." | ||
exit | ||
fi | ||
echo "Isolating general system processes to cpus '${SYS_CPUS}'." | ||
echo "Current process map by cpuset:" | ||
cset set -l -r | ||
mem_nodes=`cset set -l -r root | grep '/$' | awk '{print $4}'` | ||
cset set -c ${SYS_CPUS} -m ${mem_nodes} -s system | ||
sys_cpus=`cset set -l -r -x system | grep '/system$' | awk '{print $2}'` | ||
all_cpus=`cset set -l -r -x root | grep '/$' | awk '{print $2}'` | ||
user_cpus=$(( 0x${all_cpus} - 0x${sys_cpus} )) | ||
mask=0 | ||
num_procs=$((`nproc --all`-1)) | ||
user_cpuspec="" | ||
current_cpuspec_frag="" | ||
for (( bit=0; bit<${num_procs}; bit++)); do | ||
mask=$((1<<bit)) | ||
if [ $((user_cpus&mask)) -eq 0 ]; then | ||
if [ "${current_cpuspec_frag}" != "" ]; then | ||
user_cpuspec=${user_cpuspec},${current_cpuspec_frag}-$((bit-1)) | ||
current_cpuspec_frag="" | ||
fi | ||
else | ||
if [ "${current_cpuspec_frag}" == "" ]; then | ||
current_cpuspec_frag=${bit} | ||
fi | ||
fi | ||
done | ||
if [ "${current_cpuspec_frag}" != "" ]; then | ||
user_cpuspec=${user_cpuspec},${current_cpuspec_frag}-${bit} | ||
fi | ||
# 53-68 is disabled due to azure not allowing the device/resource | ||
# to be disabled. | ||
# for (( bit=0; bit<${num_procs}; bit++)); do | ||
# mask=$((1<<bit)) | ||
# if [ $((user_cpus&mask)) -ne 0 ]; then | ||
# echo "Temporarily disabling user cpu: ${bit}" | ||
# echo 0 > /sys/devices/system/cpu/cpu${bit}/online | ||
# fi | ||
# done | ||
# sleep 1 | ||
# cat /proc/cpuinfo | grep proc | ||
# for (( bit=0; bit<${num_procs}; bit++)); do | ||
# mask=$((1<<bit)) | ||
# if [ $((user_cpus&mask)) -ne 0 ]; then | ||
# echo "Re-enabling user cpu: ${bit}" | ||
# echo 1 > /sys/devices/system/cpu/cpu${bit}/online | ||
# fi | ||
# done | ||
cset set -c ${user_cpuspec} -m ${mem_nodes} -s user | ||
cset proc -m -k --threads -f root -t system | ||
echo "Modified process map by cpuset:" | ||
cset set -l -r | ||
echo "Done isolating general system processes to cpus '${SYS_CPUS}'." | ||
fi | ||
|
73 changes: 73 additions & 0 deletions
73
orchestration/ansible/playbooks/roles/common/files/kerneltweaks_azure.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
#!/bin/bash | ||
set -euox pipefail | ||
|
||
# kernel tweaks | ||
# misc | ||
echo never > /sys/kernel/mm/transparent_hugepage/enabled # disable transparent hugepages | ||
echo 1000000000 > /proc/sys/vm/nr_overcommit_hugepages # enable hugepages | ||
sysctl -w vm.min_free_kbytes=`cat /proc/meminfo | grep MemTotal | awk '{print ($2/4 < 8000000) ? $2/4 : 8000000}'` # keep memory in reserve for when processes request it | ||
sysctl -w vm.swappiness=0 # change swappiness | ||
sysctl -w vm.zone_reclaim_mode=0 # disable zone reclaim on numa nodes | ||
sysctl -w kernel.sched_migration_cost_ns=5000000 | ||
sysctl -w kernel.sched_autogroup_enabled=0 | ||
sysctl -w kernel.sched_latency_ns=36000000 | ||
sysctl -w kernel.sched_min_granularity_ns=10000000 | ||
|
||
#networking | ||
sysctl -w net.core.somaxconn=2048 | ||
sysctl -w net.core.netdev_max_backlog=30000 | ||
sysctl -w net.core.rmem_max=16777216 | ||
sysctl -w net.core.wmem_max=16777216 | ||
sysctl -w net.ipv4.tcp_wmem='4096 12582912 16777216' | ||
sysctl -w net.ipv4.tcp_rmem='4096 12582912 16777216' | ||
sysctl -w net.ipv4.tcp_max_syn_backlog=8096 | ||
sysctl -w net.ipv4.tcp_slow_start_after_idle=0 | ||
sysctl -w net.ipv4.tcp_tw_reuse=1 | ||
#sysctl -w net.ipv4.ip_local_port_range='10240 65535' | ||
sysctl -w net.ipv4.tcp_abort_on_overflow=1 # maybe | ||
sysctl -w net.ipv4.tcp_mtu_probing=1 | ||
sysctl -w net.ipv4.tcp_timestamps=1 | ||
sysctl -w net.ipv4.tcp_low_latency=1 | ||
sysctl -w net.core.default_qdisc=fq_codel | ||
sysctl -w net.ipv4.tcp_window_scaling=1 | ||
sysctl -w net.ipv4.tcp_max_tw_buckets=7200000 | ||
sysctl -w net.ipv4.tcp_sack=0 | ||
sysctl -w net.ipv4.tcp_fin_timeout=15 | ||
sysctl -w net.ipv4.tcp_moderate_rcvbuf=1 | ||
sysctl -w net.core.rps_sock_flow_entries=65536 | ||
|
||
sysctl -w net.core.dev_weight=600 | ||
sysctl -w net.core.netdev_budget=600 | ||
sysctl -w net.core.netdev_tstamp_prequeue=1 | ||
sysctl -w net.ipv4.tcp_congestion_control=dctcp | ||
sysctl -w net.ipv4.tcp_ecn=1 | ||
|
||
sysctl -w net.ipv4.tcp_fastopen=3 | ||
|
||
sysctl -w net.core.busy_poll=50 # spend cpu for lower latency | ||
sysctl -w net.core.busy_read=50 # spend cpu for lower latency | ||
|
||
# filesystem stuff | ||
sysctl -w vm.dirty_ratio=80 # from 40 | ||
sysctl -w vm.dirty_bytes=2147483648 # from 0 | ||
sysctl -w vm.dirty_background_bytes=268435456 # from 0 | ||
sysctl -w vm.dirty_background_ratio=5 # from 10 | ||
sysctl -w vm.dirty_expire_centisecs=12000 # from 3000 | ||
|
||
# apply changes | ||
sysctl -p # apply changed settings | ||
|
||
# from: https://www.ibm.com/developerworks/community/wikis/home?lang=en#!/wiki/W51a7ffcf4dfd_4b40_9d82_446ebc23c550/page/Linux%20on%20Power%20-%20Low%20Latency%20Tuning | ||
# Make sure to set the realtime bandwidth reservation to zero, or even real-time tasks will be asks to step aside for a bit | ||
echo 0 > /proc/sys/kernel/sched_rt_runtime_us | ||
|
||
# If a soft limit is set for the maximum realtime priority which is less than the hard limit and needs to be raised, the "ulimit -r" command can do so | ||
ulimit -r 90 | ||
|
||
interface=`ifconfig | grep '^eth' | awk '{print $1}'` | ||
##ethtool based tweaks | ||
ethtool -G ${interface} rx 4096 tx 4096 || true # for to always succeed because if value is correct already it fails | ||
#ethtool -K eth0 gso off # don't disable becuase only used when sending large packets and helps offload work from cpu | ||
ethtool -K ${interface} gro off # disable because this slows packet delivery up network stack | ||
#ethtool -K eth0 tso off # don't disable becuase only used when sending large packets and helps offload work from cpu | ||
ethtool -C ${interface} rx-usecs 0 || true # for to always succeed because if value is correct already it fails |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.