Pacemaker

From VVCWiki
Revision as of 18:27, 23 April 2011 by Vvc (talk | contribs) (→‎Pacemaker management)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigationJump to search

Pacemaker repository

Cluster

c20 node ip 10.10.10.20
crossover 10.0.0.1
c21 node ip 10.10.10.21
crossover 10.0.0.2
cl1 virt ip 10.10.10.22
/u00 FS /dev/drbd/by-res/u00
cl2 virt ip 10.10.10.23
/u01 FS /dev/drbd/by-res/u01

Cluster stack

You can use either Heartbeat or Corosync, make sure only one is enabled during startup

I suggest to change default priorities for the corosync service

sed -i -e 's/.*chkconfig:.*/# chkconfig: 345 99 00/' /etc/rc.d/init.d/corosync
chkconfig corosync resetpriorities

Heartbeat

  • /etc/ha.d/ha.cf
# Logging
debug				1
use_logd			false
logfacility			daemon
 
# Misc Options
traditional_compression 	off
compression			bz2
coredumps			true
 
# Communications
udpport				694
bcast				eth1 eth0
node				c20
node				c21
   
# Thresholds (in seconds)
keepalive			1
warntime			6
deadtime			10
initdead			30

pacemaker                     respawn
  • /etc/ha.d/authkeys
auth 1
1 sha1 SecretCode
  • fix permissions
chmod 400 /etc/ha.d/authkeys
  • populate heartbeat configuration to c21
[root@c21 ~]# rsync -av c20:/etc/ha.d/ /etc/ha.d/
  • start heartbeat daemon on both nodes
service heartbeat start

Corosync

  • generate secure key
corosync-keygen
  • create config file /etc/corosync/corosync.conf on c20

When you have multiple interfaces, separate port numbers by 2, corosync uses port N and N-1

compatibility: none

aisexec {
        user:   root
        group:  root
}

service {
        name: pacemaker
        clustername: chepkov
        ver:  0
}

totem {
	version: 2
	token: 5000
	token_retransmits_before_loss_const: 20
	join: 1000
	consensus: 11000
	vsftype: none
	max_messages: 20
	secauth: on
	threads: 0
	clear_node_high_bit: yes
	rrp_mode: passive
	interface {
		ringnumber: 0
		broadcast: yes
		bindnetaddr: 10.0.0.0
		mcastport: 5405
	}
	interface {
		ringnumber: 1
		broadcast: yes
		bindnetaddr: 10.10.10.0
		mcastport: 5407
	}
}

logging {
	fileline: off
	to_stderr: no
	to_syslog: yes
	debug: off
	timestamp: on
}

amf {
	mode: disabled
}

  • populate corosync configuration to c21
[root@c21 ~]# rsync -av c20:/etc/corosync/ /etc/corosync/
  • start corosync daemon on both nodes
service corosync start

Pacemaker config

Set default cluster options

[root@c20 ~]# crm configure
property no-quorum-policy=ignore
property stonith-enabled=false
property default-resource-stickiness=1000
property dc-deadtime=2min
property default-action-timeout=120s
property shutdown-escalation=5min
property cluster-recheck-interval=10min
property start-failure-is-fatal=false
property pe-error-series-max=1000
property pe-input-series-max=1000
property pe-warn-series-max=1000
rsc_defaults failure-timeout=10min
commit
bye

Configure drbd resources

[root@c20 ~]# crm configure
primitive drbd_u00 ocf:linbit:drbd params drbd_resource="u00" \
	op start timeout="240" \
        op monitor interval="59s" role="Master" timeout="30s" \
	op monitor interval="60s" role="Slave" timeout="30s"
primitive drbd_u01 ocf:linbit:drbd params drbd_resource="u01" \
	op start timeout="240" \
        op monitor interval="59s" role="Master" timeout="30s" \
	op monitor interval="60s" role="Slave" timeout="30s"
ms ms_drbd_u00 drbd_u00 \
        meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
ms ms_drbd_u01 drbd_u01 \
        meta master-max="1" master-node-max="1" clone-max="2" clone-node-max="1" notify="true"
commit
bye

Configure filesystems

[root@c20 ~]# crm configure
primitive fs_u00 ocf:heartbeat:Filesystem \
        params device="/dev/drbd/by-res/u00" directory="/u00" fstype="ext3" options="noatime"
primitive fs_u01 ocf:heartbeat:Filesystem \
        params device="/dev/drbd/by-res/u01" directory="/u01" fstype="ext3" options="noatime"
colocation col_u00 inf: fs_u00 ms_drbd_u00:Master
colocation col_u01 inf: fs_u01 ms_drbd_u01:Master
order fs_after_drbd_u00 inf: ms_drbd_u00:promote fs_u00:start
order fs_after_drbd_u01 inf: ms_drbd_u01:promote fs_u01:start
commit
bye

Configure IP

[root@c20 ~]# crm configure
primitive ip_rg0 ocf:heartbeat:IPaddr2 params nic="eth0" ip="10.10.10.22" cidr_netmask="32"
primitive ip_rg1 ocf:heartbeat:IPaddr2 params nic="eth0" ip="10.10.10.23" cidr_netmask="32"
commit
bye

Configure apache

[root@c20 ~]# crm configure
primitive apache_rg0 ocf:heartbeat:apache \
	params configfile="/u00/apache/conf/httpd.conf" \
        statusurl="http://cl1.chepkov.lan/server-status" \
        op monitor interval="2min" \
	meta target-role="stopped"
primitive apache_rg1 ocf:heartbeat:apache \
	params configfile="/u01/apache/conf/httpd.conf" \
        statusurl="http://cl2.chepkov.lan/server-status" \
        op monitor interval="2min" \
	meta target-role="stopped"
commit
bye

Create groups of resources and their preferred location

[root@c20 ~]# crm configure
group rg0 fs_u00 ip_rg0 apache_rg0
group rg1 fs_u01 ip_rg1 apache_rg1
location rg0_on_c20 rg0 100: c20.chepkov.lan
location rg1_on_c21 rg1 100: c21.chepkov.lan
commit
bye

Start apache

crm resource start apache_rg0
crm resource start apache_rg1

Define ping constraints

We don't want to run any service if default router is unreachable.

# crm configure
primitive ping ocf:pacemaker:ping \
  params name="pingd" host_list="10.10.10.250" multiplier="200" timeout="5" \
  op monitor interval="10"

clone connected ping \
        meta globally-unique="false"

location rg0-connected rg0 \
 rule -inf: not_defined pingd or pingd lte 0

location rg1-connected rg1 \
 rule -inf: not_defined pingd or pingd lte 0

commit
bye

Pacemaker management

  • Stop all resources
crm configure property stop-all-resources=true
  • increase number of lrmd children
 /usr/sbin/lrmadmin -p max-children 10