McastServiceImpl.java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.catalina.tribes.membership;
import java.io.IOException;
import java.net.BindException;
import java.net.DatagramPacket;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.MulticastSocket;
import java.net.SocketTimeoutException;
import java.util.Arrays;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.catalina.tribes.Channel;
import org.apache.catalina.tribes.Member;
import org.apache.catalina.tribes.MembershipListener;
import org.apache.catalina.tribes.MessageListener;
import org.apache.catalina.tribes.io.ChannelData;
import org.apache.catalina.tribes.io.XByteBuffer;
import org.apache.catalina.tribes.util.ExecutorFactory;
import org.apache.catalina.tribes.util.StringManager;
import org.apache.juli.logging.Log;
import org.apache.juli.logging.LogFactory;
/**
* A <b>membership</b> implementation using simple multicast.
* This is the representation of a multicast membership service.
* This class is responsible for maintaining a list of active cluster nodes in the cluster.
* If a node fails to send out a heartbeat, the node will be dismissed.
* This is the low level implementation that handles the multicasting sockets.
* Need to fix this, could use java.nio and only need one thread to send and receive, or
* just use a timeout on the receive
*/
public class McastServiceImpl {
private static final Log log = LogFactory.getLog(McastService.class);
protected static final int MAX_PACKET_SIZE = 65535;
protected static final StringManager sm = StringManager.getManager(Constants.Package);
/**
* Internal flag used for the listen thread that listens to the multicasting socket.
*/
protected volatile boolean doRunSender = false;
protected volatile boolean doRunReceiver = false;
protected volatile int startLevel = 0;
/**
* Socket that we intend to listen to
*/
protected MulticastSocket socket;
/**
* The local member that we intend to broad cast over and over again
*/
protected final MemberImpl member;
/**
* The multicast address
*/
protected final InetAddress address;
/**
* The multicast port
*/
protected final int port;
/**
* The time it takes for a member to expire.
*/
protected final long timeToExpiration;
/**
* How often to we send out a broadcast saying we are alive, must be smaller than timeToExpiration
*/
protected final long sendFrequency;
/**
* Reuse the sendPacket, no need to create a new one every time
*/
protected DatagramPacket sendPacket;
/**
* Reuse the receivePacket, no need to create a new one every time
*/
protected DatagramPacket receivePacket;
/**
* The membership, used so that we calculate memberships when they arrive or don't arrive
*/
protected Membership membership;
/**
* The actual listener, for callback when stuff goes down
*/
protected final MembershipListener service;
/**
* The actual listener for broadcast callbacks
*/
protected final MessageListener msgservice;
/**
* Thread to listen for pings
*/
protected ReceiverThread receiver;
/**
* Thread to send pings
*/
protected SenderThread sender;
/**
* Time to live for the multicast packets that are being sent out
*/
protected final int mcastTTL;
/**
* Read timeout on the mcast socket
*/
protected int mcastSoTimeout = -1;
/**
* bind address
*/
protected final InetAddress mcastBindAddress;
/**
* nr of times the system has to fail before a recovery is initiated
*/
protected int recoveryCounter = 10;
/**
* The time the recovery thread sleeps between recovery attempts
*/
protected long recoverySleepTime = 5000;
/**
* Add the ability to turn on/off recovery
*/
protected boolean recoveryEnabled = true;
/**
* Dont interrupt the sender/receiver thread, but pass off to an executor
*/
protected final ExecutorService executor =
ExecutorFactory.newThreadPool(0, 2, 2, TimeUnit.SECONDS);
/**
* disable/enable local loopback message
*/
protected final boolean localLoopbackDisabled;
private Channel channel;
/**
* Create a new mcast service instance.
* @param member - the local member
* @param sendFrequency - the time (ms) in between pings sent out
* @param expireTime - the time (ms) for a member to expire
* @param port - the mcast port
* @param bind - the bind address (not sure this is used yet)
* @param mcastAddress - the mcast address
* @param ttl multicast ttl that will be set on the socket
* @param soTimeout Socket timeout
* @param service - the callback service
* @param msgservice Message listener
* @param localLoopbackDisabled - disable loopbackMode
* @throws IOException Init error
*/
public McastServiceImpl(
MemberImpl member,
long sendFrequency,
long expireTime,
int port,
InetAddress bind,
InetAddress mcastAddress,
int ttl,
int soTimeout,
MembershipListener service,
MessageListener msgservice,
boolean localLoopbackDisabled)
throws IOException {
this.member = member;
this.address = mcastAddress;
this.port = port;
this.mcastSoTimeout = soTimeout;
this.mcastTTL = ttl;
this.mcastBindAddress = bind;
this.timeToExpiration = expireTime;
this.service = service;
this.msgservice = msgservice;
this.sendFrequency = sendFrequency;
this.localLoopbackDisabled = localLoopbackDisabled;
init();
}
public void init() throws IOException {
setupSocket();
sendPacket = new DatagramPacket(new byte[MAX_PACKET_SIZE],MAX_PACKET_SIZE);
sendPacket.setAddress(address);
sendPacket.setPort(port);
receivePacket = new DatagramPacket(new byte[MAX_PACKET_SIZE],MAX_PACKET_SIZE);
receivePacket.setAddress(address);
receivePacket.setPort(port);
member.setCommand(new byte[0]);
if ( membership == null ) {
membership = new Membership(member);
}
}
protected void setupSocket() throws IOException {
if (mcastBindAddress != null) {
try {
log.info(sm.getString("mcastServiceImpl.bind", address, Integer.toString(port)));
socket = new MulticastSocket(new InetSocketAddress(address,port));
} catch (BindException e) {
/*
* On some platforms (e.g. Linux) it is not possible to bind
* to the multicast address. In this case only bind to the
* port.
*/
log.info(sm.getString("mcastServiceImpl.bind.failed"));
socket = new MulticastSocket(port);
}
} else {
socket = new MulticastSocket(port);
}
socket.setLoopbackMode(localLoopbackDisabled); //hint if we want disable loop back(local machine) messages
if (mcastBindAddress != null) {
if(log.isInfoEnabled()) {
log.info(sm.getString("mcastServiceImpl.setInterface", mcastBindAddress));
}
socket.setInterface(mcastBindAddress);
} //end if
//force a so timeout so that we don't block forever
if (mcastSoTimeout <= 0) {
mcastSoTimeout = (int)sendFrequency;
}
if (log.isInfoEnabled()) {
log.info(sm.getString("mcastServiceImpl.setSoTimeout",
Integer.toString(mcastSoTimeout)));
}
socket.setSoTimeout(mcastSoTimeout);
if ( mcastTTL >= 0 ) {
if(log.isInfoEnabled()) {
log.info(sm.getString("mcastServiceImpl.setTTL", Integer.toString(mcastTTL)));
}
socket.setTimeToLive(mcastTTL);
}
}
/**
* Start the service
* @param level 1 starts the receiver, level 2 starts the sender
* @throws IOException if the service fails to start
* @throws IllegalStateException if the service is already started
*/
public synchronized void start(int level) throws IOException {
boolean valid = false;
if ( (level & Channel.MBR_RX_SEQ)==Channel.MBR_RX_SEQ ) {
if ( receiver != null ) {
throw new IllegalStateException(sm.getString("mcastServiceImpl.receive.running"));
}
try {
if ( sender == null ) {
socket.joinGroup(address);
}
}catch (IOException iox) {
log.error(sm.getString("mcastServiceImpl.unable.join"));
throw iox;
}
doRunReceiver = true;
receiver = new ReceiverThread();
receiver.setDaemon(true);
receiver.start();
valid = true;
}
if ( (level & Channel.MBR_TX_SEQ)==Channel.MBR_TX_SEQ ) {
if ( sender != null ) {
throw new IllegalStateException(sm.getString("mcastServiceImpl.send.running"));
}
if ( receiver == null ) {
socket.joinGroup(address);
}
//make sure at least one packet gets out there
send(false);
doRunSender = true;
sender = new SenderThread(sendFrequency);
sender.setDaemon(true);
sender.start();
//we have started the receiver, but not yet waited for membership to establish
valid = true;
}
if (!valid) {
throw new IllegalArgumentException(sm.getString("mcastServiceImpl.invalid.startLevel"));
}
//pause, once or twice
waitForMembers(level);
startLevel = (startLevel | level);
}
private void waitForMembers(int level) {
long memberwait = sendFrequency*2;
if(log.isInfoEnabled()) {
log.info(sm.getString("mcastServiceImpl.waitForMembers.start",
Long.toString(memberwait), Integer.toString(level)));
}
try {Thread.sleep(memberwait);}catch (InterruptedException ignore){}
if(log.isInfoEnabled()) {
log.info(sm.getString("mcastServiceImpl.waitForMembers.done", Integer.toString(level)));
}
}
/**
* Stops the service.
* @param level Stop status
* @return <code>true</code> if the stop is complete
* @throws IOException if the service fails to disconnect from the sockets
*/
public synchronized boolean stop(int level) throws IOException {
boolean valid = false;
if ( (level & Channel.MBR_RX_SEQ)==Channel.MBR_RX_SEQ ) {
valid = true;
doRunReceiver = false;
if ( receiver !=null ) {
receiver.interrupt();
}
receiver = null;
}
if ( (level & Channel.MBR_TX_SEQ)==Channel.MBR_TX_SEQ ) {
valid = true;
doRunSender = false;
if ( sender != null ) {
sender.interrupt();
}
sender = null;
}
if (!valid) {
throw new IllegalArgumentException(sm.getString("mcastServiceImpl.invalid.stopLevel"));
}
startLevel = (startLevel & (~level));
//we're shutting down, send a shutdown message and close the socket
if ( startLevel == 0 ) {
//send a stop message
member.setCommand(Member.SHUTDOWN_PAYLOAD);
send(false);
//leave mcast group
try {socket.leaveGroup(address);}catch ( Exception ignore){}
try {socket.close();}catch ( Exception ignore){}
member.setServiceStartTime(-1);
}
return (startLevel == 0);
}
/**
* Receive a datagram packet, locking wait
* @throws IOException Received failed
*/
public void receive() throws IOException {
boolean checkexpired = true;
try {
socket.receive(receivePacket);
if(receivePacket.getLength() > MAX_PACKET_SIZE) {
log.error(sm.getString("mcastServiceImpl.packet.tooLong",
Integer.toString(receivePacket.getLength())));
} else {
byte[] data = new byte[receivePacket.getLength()];
System.arraycopy(receivePacket.getData(), receivePacket.getOffset(), data, 0, data.length);
if (XByteBuffer.firstIndexOf(data,0,MemberImpl.TRIBES_MBR_BEGIN)==0) {
memberDataReceived(data);
} else {
memberBroadcastsReceived(data);
}
}
} catch (SocketTimeoutException x ) {
//do nothing, this is normal, we don't want to block forever
//since the receive thread is the same thread
//that does membership expiration
}
if (checkexpired) {
checkExpired();
}
}
private void memberDataReceived(byte[] data) {
final Member m = MemberImpl.getMember(data);
if (log.isTraceEnabled()) {
log.trace("Mcast receive ping from member " + m);
}
Runnable t = null;
final Thread currentThread = Thread.currentThread();
if (Arrays.equals(m.getCommand(), Member.SHUTDOWN_PAYLOAD)) {
if (log.isDebugEnabled()) {
log.debug(sm.getString("mcastServiceImpl.memberShutdown", m));
}
membership.removeMember(m);
t = new Runnable() {
@Override
public void run() {
String name = currentThread.getName();
try {
currentThread.setName("Membership-MemberDisappeared.");
service.memberDisappeared(m);
} finally {
currentThread.setName(name);
}
}
};
} else if (membership.memberAlive(m)) {
if (log.isDebugEnabled()) {
log.debug(sm.getString("mcastServiceImpl.memberAdd", m));
}
t = new Runnable() {
@Override
public void run() {
String name = currentThread.getName();
try {
currentThread.setName("Membership-MemberAdded.");
service.memberAdded(m);
} finally {
currentThread.setName(name);
}
}
};
}
if (t != null) {
executor.execute(t);
}
}
private void memberBroadcastsReceived(final byte[] b) {
if (log.isTraceEnabled()) {
log.trace("Mcast received broadcasts.");
}
XByteBuffer buffer = new XByteBuffer(b,true);
if (buffer.countPackages(true)>0) {
int count = buffer.countPackages();
final ChannelData[] data = new ChannelData[count];
for (int i=0; i<count; i++) {
try {
data[i] = buffer.extractPackage(true);
} catch (IllegalStateException ise) {
log.debug(sm.getString("mcastServiceImpl.messageError"), ise);
}
}
Runnable t = new Runnable() {
@Override
public void run() {
Thread currentThread = Thread.currentThread();
String name = currentThread.getName();
try {
currentThread.setName("Membership-MemberAdded.");
for (ChannelData datum : data) {
try {
if (datum != null && !member.equals(datum.getAddress())) {
msgservice.messageReceived(datum);
}
} catch (Throwable t) {
if (t instanceof ThreadDeath) {
throw (ThreadDeath) t;
}
if (t instanceof VirtualMachineError) {
throw (VirtualMachineError) t;
}
log.error(sm.getString("mcastServiceImpl.unableReceive.broadcastMessage"), t);
}
}
} finally {
currentThread.setName(name);
}
}
};
executor.execute(t);
}
}
protected final Object expiredMutex = new Object();
protected void checkExpired() {
synchronized (expiredMutex) {
Member[] expired = membership.expire(timeToExpiration);
for (final Member member : expired) {
if (log.isDebugEnabled()) {
log.debug(sm.getString("mcastServiceImpl.memberExpire", member));
}
try {
Runnable t = new Runnable() {
@Override
public void run() {
Thread currentThread = Thread.currentThread();
String name = currentThread.getName();
try {
currentThread.setName("Membership-MemberExpired.");
service.memberDisappeared(member);
} finally {
currentThread.setName(name);
}
}
};
executor.execute(t);
} catch (Exception x) {
log.error(sm.getString("mcastServiceImpl.memberDisappeared.failed"), x);
}
}
}
}
/**
* Send a ping.
* @param checkexpired <code>true</code> to check for expiration
* @throws IOException Send error
*/
public void send(boolean checkexpired) throws IOException {
send(checkexpired,null);
}
private final Object sendLock = new Object();
public void send(boolean checkexpired, DatagramPacket packet) throws IOException {
checkexpired = (checkexpired && (packet==null));
//ignore if we haven't started the sender
//if ( (startLevel&Channel.MBR_TX_SEQ) != Channel.MBR_TX_SEQ ) return;
if (packet==null) {
member.inc();
if(log.isTraceEnabled()) {
log.trace("Mcast send ping from member " + member);
}
byte[] data = member.getData();
packet = new DatagramPacket(data,data.length);
} else if (log.isTraceEnabled()) {
log.trace("Sending message broadcast "+packet.getLength()+ " bytes from "+ member);
}
packet.setAddress(address);
packet.setPort(port);
//TODO this operation is not thread safe
synchronized (sendLock) {
socket.send(packet);
}
if ( checkexpired ) {
checkExpired();
}
}
public long getServiceStartTime() {
return (member!=null) ? member.getServiceStartTime() : -1l;
}
public int getRecoveryCounter() {
return recoveryCounter;
}
public boolean isRecoveryEnabled() {
return recoveryEnabled;
}
public long getRecoverySleepTime() {
return recoverySleepTime;
}
public Channel getChannel() {
return channel;
}
public void setChannel(Channel channel) {
this.channel = channel;
}
public class ReceiverThread extends Thread {
int errorCounter = 0;
public ReceiverThread() {
super();
String channelName = "";
if (channel.getName() != null) {
channelName = "[" + channel.getName() + "]";
}
setName("Tribes-MembershipReceiver" + channelName);
}
@Override
public void run() {
while ( doRunReceiver ) {
try {
receive();
errorCounter=0;
} catch ( ArrayIndexOutOfBoundsException ax ) {
//we can ignore this, as it means we have an invalid package
//but we will log it to debug
if ( log.isDebugEnabled() ) {
log.debug(sm.getString("mcastServiceImpl.invalidMemberPackage"), ax);
}
} catch ( Exception x ) {
if (errorCounter==0 && doRunReceiver) {
log.warn(sm.getString("mcastServiceImpl.error.receiving"),x);
} else if (log.isDebugEnabled()) {
if (doRunReceiver) {
log.debug(sm.getString("mcastServiceImpl.error.receiving"), x);
} else {
log.warn(sm.getString("mcastServiceImpl.error.receivingNoSleep"), x);
}
}
if (doRunReceiver) {
try {
sleep(500);
} catch (Exception ignore){
// Ignore
}
if ( (++errorCounter)>=recoveryCounter ) {
errorCounter=0;
RecoveryThread.recover(McastServiceImpl.this);
}
}
}
}
}
}//class ReceiverThread
public class SenderThread extends Thread {
final long time;
int errorCounter=0;
public SenderThread(long time) {
this.time = time;
String channelName = "";
if (channel.getName() != null) {
channelName = "[" + channel.getName() + "]";
}
setName("Tribes-MembershipSender" + channelName);
}
@Override
public void run() {
while ( doRunSender ) {
try {
send(true);
errorCounter = 0;
} catch ( Exception x ) {
if (errorCounter==0) {
log.warn(sm.getString("mcastServiceImpl.send.failed"),x);
} else {
log.debug(sm.getString("mcastServiceImpl.send.failed"),x);
}
if ( (++errorCounter)>=recoveryCounter ) {
errorCounter=0;
RecoveryThread.recover(McastServiceImpl.this);
}
}
try {
sleep(time);
} catch (Exception ignore) {
// Ignore
}
}
}
}//class SenderThread
protected static class RecoveryThread extends Thread {
private static final AtomicBoolean running = new AtomicBoolean(false);
public static synchronized void recover(McastServiceImpl parent) {
if (!parent.isRecoveryEnabled()) {
return;
}
if (!running.compareAndSet(false, true)) {
return;
}
Thread t = new RecoveryThread(parent);
String channelName = "";
if (parent.channel.getName() != null) {
channelName = "[" + parent.channel.getName() + "]";
}
t.setName("Tribes-MembershipRecovery" + channelName);
t.setDaemon(true);
t.start();
}
final McastServiceImpl parent;
public RecoveryThread(McastServiceImpl parent) {
this.parent = parent;
}
public boolean stopService() {
try {
parent.stop(Channel.MBR_RX_SEQ | Channel.MBR_TX_SEQ);
return true;
} catch (Exception x) {
log.warn(sm.getString("mcastServiceImpl.recovery.stopFailed"), x);
return false;
}
}
public boolean startService() {
try {
parent.init();
parent.start(Channel.MBR_RX_SEQ | Channel.MBR_TX_SEQ);
return true;
} catch (Exception x) {
log.warn(sm.getString("mcastServiceImpl.recovery.startFailed"), x);
return false;
}
}
@Override
public void run() {
boolean success = false;
int attempt = 0;
try {
while (!success) {
if(log.isInfoEnabled()) {
log.info(sm.getString("mcastServiceImpl.recovery"));
}
if (stopService() & startService()) {
success = true;
if(log.isInfoEnabled()) {
log.info(sm.getString("mcastServiceImpl.recovery.successful"));
}
}
try {
if (!success) {
if(log.isInfoEnabled()) {
log.info(sm.getString("mcastServiceImpl.recovery.failed",
Integer.toString(++attempt),
Long.toString(parent.recoverySleepTime)));
}
sleep(parent.recoverySleepTime);
}
}catch (InterruptedException ignore) {
}
}
}finally {
running.set(false);
}
}
}
public void setRecoveryCounter(int recoveryCounter) {
this.recoveryCounter = recoveryCounter;
}
public void setRecoveryEnabled(boolean recoveryEnabled) {
this.recoveryEnabled = recoveryEnabled;
}
public void setRecoverySleepTime(long recoverySleepTime) {
this.recoverySleepTime = recoverySleepTime;
}
}