File ibm-prtm_for_ls22-bnc443830.diff of Package ibmrtpkgs

Date: Mon, 10 Nov 2008 20:25:21 -0800  (21:25 MST)

This is an update to the /etc/init.d/ibm-prtm file. It is part of the
userpsace code needed for SMI remediation.

Submitted-by: Keith Mannthey <kmannth@us.ibm.com> 

This ibm-prtmd is much more modular, thanks to Vernon Mauery, and it
supports the LS22.  This is the core userspace code that maps and
reports EDAC ECC errors to the dimm slots. 

Signed-off-by: Keith Mannthey <kmannth@us.ibm.com>

---

Index: ibmrtpkgs-2/ibm-prtm/ibm-prtm
===================================================================
--- ibmrtpkgs-2.orig/ibm-prtm/ibm-prtm
+++ ibmrtpkgs-2/ibm-prtm/ibm-prtm
@@ -1,23 +1,47 @@
 #!/bin/bash  
 #
-# Copyright IBM Corporation © 2008
+# © Copyright IBM Corp. 2008.  All Rights Reserved.
 # Author: Keith Mannthey <kmannth@us.ibm.com>
 #
 # ibm-prtm	This shell script takes care of starting and stopping
 #		ibm-prtmd (IBM Premium Real-Time Mode daemon).
 #
-# chkconfig: 2345 60 40
+### BEGIN INIT INFO
+# Provides: ibm-prtmd
+# Required-Start:
+# Required-Stop:
+# Default-Start: 3 5
+# Default-Stop: 0 1 2 6
+# Description: Start the IBM Premium Real-Time daemon
+### END INIT INFO
+
 # description: Enters/Exits IBM Premium Real-Time Mode. \
 # Enables and Disables IBM Premium Real-Time Mode, as well as starts and \
 # stops the ibm-prtmd daemon.
 
 # Source function library.
-. /etc/init.d/functions
+. /etc/rc.status
 
 RETVAL=0
 prog="ibm-prtmd"
+PIDFILE=/var/run/ibm-prtmd.pid
 NAME="IBM Real-Time HW Daemon"
+get_pid() {
+	local pidfile="$1"
+	PID=`cat $pidfile 2>/dev/null`
+	if [ $? -eq 0 ] && grep -q ibm-prtmd /proc/$PID/cmdline 2>&1 /dev/null; then
+		echo $PID
+		return 0
+	fi
+	return 1
+}
 start() {
+	PID=`get_pid $PIDFILE`
+	if [ $? -eq 0 ]; then
+		echo "ibm-prtmd already running (pid $PID)"
+		exit 1
+	fi
+
 	if ! modprobe ibm_rtl >/dev/null 2>&1
 	then 
 		logger -s -t "$NAME" "BIOS Real-Time Mode not supported on this hardware or kernel."
@@ -63,8 +87,8 @@ start() {
 		
 
 	# Start daemons.
-	echo "Starting $prog: "
-	start_daemon /usr/bin/$prog
+	echo -n "Starting $prog: "
+	/usr/bin/$prog
 	RETVAL=$?
 	if [ $RETVAL -ne 0 ]; then
 		logger -s -t "$NAME" "An error has occurred!"
@@ -75,7 +99,7 @@ start() {
 		ipmitool raw 0x3A 0x1A 1
 		rm -f /var/lock/subsys/$prog
 	else
-		logger -s -t "$NAME" "System Management Interrupts have been disable to"
+		logger -s -t "$NAME" "System Management Interrupts have been disabled to"
 		logger -s -t "$NAME" "allow this system to run in Real-Time Mode."
 		touch /var/lock/subsys/$prog
 	fi 
@@ -86,11 +110,19 @@ start() {
 stop() {
 	echo 0 > /sys/devices/system/ibm_rtl/state
 	ipmitool raw 0x3A 0x1A 1
-	logger -s -t "$NAME" "Thermal policies (ie. acoustic mode) which were enabled on systems"
-	logger -s -t "$NAME" "running in Real-Time Mode may not be accepted."
+	logger -s -t "$NAME" "Thermal policies (ie. acoustic mode) which were enabled on"
+	logger -s -t "$NAME" "systems running in Real-Time Mode may not be accepted."
 	logger -s -t "$NAME" "Please set thermal policies on this system again if desired."
-	killproc $prog
+	PID=`get_pid $PIDFILE`
+	if [ $? -eq 0 ]; then
+		kill -9 $PID
+		true
+	else
+		echo "ibm-prtmd is not running"
+		false
+	fi
 	RETVAL=$?
+	rm -f $PIDFILE
 	echo
 	[ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/$prog
 	return $RETVAL
@@ -100,21 +132,26 @@ stop() {
 case "$1" in
   start)
 	start
+	rc_status -v
 	;;
   stop)
 	stop
+	rc_status -v
 	;;
   status)
-	if [ `pidof -x $prog` ]; then
-		echo "$prog is running"
+	PID=`get_pid $PIDFILE`
+	if [ $? -eq 0 ]; then
+		echo "ibm-prtmd running... (pid $PID)"
 	else
-		echo "$prog is stopped"
+		echo "ibm-prtmd is not running"
+		rm -f $PIDFILE
 	fi
-	RETVAL=$?
 	;;
   restart|reload)
 	stop
+	rc_status -v
 	start
+	rc_status -v
 	RETVAL=$?
 	;;
   condrestart)
Index: ibmrtpkgs-2/ibm-prtm/ibm-prtmd
===================================================================
--- ibmrtpkgs-2.orig/ibm-prtm/ibm-prtmd
+++ ibmrtpkgs-2/ibm-prtm/ibm-prtmd
@@ -1,293 +1,495 @@
-#! /usr/bin/python
-#-*- coding: latin-1 -*-#
+#!/usr/bin/python
+# ex: set encoding=utf8 tabstop=4 expandtab shiftwidth=4 softtabstop=4:
 #
-# Copyright IBM Corporation © 2008
+# © Copyright IBM Corp. 2008.  All Rights Reserved.
 # Author: Keith Mannthey <kmannth@us.ibm.com>
+#         Vernon Mauery <vernux@us.ibm.com>
 #
 # This program is free software; you can redistribute it and/or
-#modify it under the terms of the GNU General Public License
-#as published by the Free Software Foundation; either version 2
-#of the License, or (at your option) any later version.
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
 #
-#This program is distributed in the hope that it will be useful,
-#but WITHOUT ANY WARRANTY; without even the implied warranty of
-#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-#GNU General Public License for more details.
-
-#You should have received a copy of the GNU General Public License
-#along with this program; if not, write to the Free Software
-#Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  
-#02110-1301, USA.
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+# 02110-1301, USA.
+
 
-import time
 import os
 import sys
-import threading
 import re
 import syslog
+import signal
+import fcntl
+import termios
+from time import sleep
+
+def die(msg, ret=-1):
+    print msg
+    sys.exit(ret)
+
+class InvalidMachineError(Exception):
+    pass
+
+def detach_tty():
+        childpid = os.fork()
+        if childpid < 0:
+                die("fork failed")
+        elif childpid > 0:
+                sys.exit(0)
+
+        # lose controlling tty
+        try:
+                ttyfd = os.open('/dev/tty', os.O_RDWR)
+                if ttyfd >= 0:
+                        fcntl.ioctl(ttyfd, termios.TIOCNOTTY, 0)
+                        os.close(ttyfd)
+        except OSError:
+                pass
+        os.setpgrp()
+
+        # ignore sighup
+        signal.signal(signal.SIGHUP, signal.SIG_IGN)
+        childpid = os.fork()
+        if childpid < 0:
+                die("fork failed")
+        elif childpid > 0:
+                sys.exit(0)
+
+def daemonize (logfile=None):
+        if os.getppid() != 1:
+                detach_tty()
+        os.close(0)
+
+        # Reopen stdin descriptor on /dev/null */
+        fd = os.open("/dev/null", os.O_RDWR)
+        if fd < 0:       # stdin
+                die("cannot open /dev/null for stdin")
+        if logfile is not None:
+                logfd = os.open(logfile, os.O_CREAT|os.O_WRONLY|os.O_APPEND, 0666)
+                if logfd < 0: # stdout
+                        die("could not open logfile (%s) for stdout" % logfile)
+        else:
+                logfd = 0
+        os.close(1)
+        os.close(2)
+        fd = os.dup(logfd)
+        if fd < 0:
+                die('failed to reopen stdout')
+        fd = os.dup(logfd)
+        if fd < 0:
+                die('failed to reopen stdout')
 
-def print_syslog(message):
-	syslog.syslog(message)
-	print (message)
+        # move to root directory, so we don't prevent filesystem unmounts
+        os.chdir("/");
 
-#Check if on hs21xm.
-def is_hs21():
-	#check for 5000P memory controller
-	if os.popen("lspci | grep \"5000P Chipset Memory\"").readlines() != []:
-		return True
-	else:
-		return False
-
-#assign the correct dimm value to each channel
-def hs21_rework(ch_list):
-	ch_list.sort()
-	#With the channels sorted toplogy always looks like
-	order=4,2,5,7,4,2,5,7,3,1,6,8,3,1,6,8
-	
-	for channel in range(0,len(ch_list)):
-		ch_list[channel]["dimm"] = order[channel]
-
-	return ch_list
-
-#Read and decode dmidecode
-def read_dmi():
-
-	dmi_record = {"dimm": re.compile(r"Locator: (?P<dimm>DIMM\d+)"),
-		      "size": re.compile(r"Size: (?P<size>\d+) MB")}
-
-	dmi_handle = r"Handle [A-F\d]x[A-F\d]{4}\, DMI type \d+\, \d+ bytes\."
-
-	count = 0
-	dmi_list = []
-	for handle in re.split(dmi_handle, os.popen("dmidecode").read()):
-		if "Memory Device\n" in handle[:15]:
-			slot = {"count": count, "enabled": False, "dimm": None, "size": 0}
-			record = handle[15:].split("\n")
-		
-			r = dmi_record["size"].match(record[4][1:])
-			if r:
-				slot["enabled"] = True
-				slot["size"] = int(r.group(1))
-				dmi_list.append(slot);
-				count = count + 1 
-			
-			r = dmi_record["dimm"].match(record[7][1:])
-			if r:
-				slot["dimm"] = int(r.group("dimm")[4:])
-	#Reorder DMI entries to csrow order exposed by EDAC. 
-	return dmi_list
-
-#return the n'th (dimm) dimm in dmi_list
-def get_dmi_dimm(dmi_list, dimm):
-	if dimm < 0 or dimm > len(dmi_list):
-		return -1
-	else:
-		return dmi_list[dimm]["dimm"]	
-
-#return the total amount of memory reported by dmi_list
-def get_dmi_size(dmi_list):
-	size = 0
-	for dimm in dmi_list:
-		size = size +  dimm["size"] 
-	return size	
-
-#return the "real dimm" value for a dimm
-def get_dmi_dimm_size(dmi_list, dimm):
-	if dimm < 0 or dimm > len(dmi_list):
-		return 0
-	else:
-		return dmi_list[dimm]["size"]
+        # set our umask to something reasonable (we hope)
+        os.umask(022);
 
+        return 0
+
+def print_syslog(message):
+    syslog.syslog(message)
+    print (message)
 
 def check_rtpm_enabled():
-	if not os.path.exists("/sys/devices/system/ibm_rtl/state"):
-		return False
+    if not os.path.exists("/sys/devices/system/ibm_rtl/state"):
+        return False
+
+    # check IBM RTL state is enabled
+    return bool(int(open("/sys/devices/system/ibm_rtl/state").read()))
 
-	#check IBM RTL state is enabled
-	return bool(int(open("/sys/devices/system/ibm_rtl/state").read()))
-	
-#decode each cscrow dir and add all the channels
-def get_rowdata(mcdir):
-	csrowlist = []
-	csrowdirs = sorted(os.listdir(mcdir))
-	
-	for csrow in csrowdirs:
-		if csrow[0:5] == "csrow":
-			dpath=mcdir+"/"+csrow
-			chdir = os.popen("find " + dpath + " | grep ch[0-9]_ce_count").readlines()
-			size_num = int(open(dpath+"/size_mb").read().strip())/len(chdir)
-			for ch in chdir:
-				ce_num = int(open(ch.strip()).read().strip())
-				csrowlist.append(dict(name=csrow, path=ch, ce=ce_num, size=size_num, dimm=-1, count=0, reported=False))
-
-	return csrowlist
-
-#read info on all EDAC channels
-#data returned will not have proper location ("dimm")
-def read_edac():
-	mcpath = "/sys/devices/system/edac/mc"
-	mclist = []
-	edac = []
-	# Check edac dir exists
-	if(not os.path.exists(mcpath)):
-		return mclist
-	# Get list of mc dirs
-	mcdir = sorted(os.listdir(mcpath))
-	for obj in mcdir:
-		if obj[0:2] == "mc":
-			dpath = mcpath+"/"+obj
-			mclist.extend(get_rowdata(dpath))
-	return mclist
-
-#return the total size of memory reported by EDAC
-def get_edac_size(edac):
-	size = 0	
-	for ch in edac:
-		size = size + ch["size"]
-	return size
-
-def mark_dimm_reported(dimm,edac_data):
-	for ch in edac_data:
-		if ch["dimm"] == dimm:
-			ch["reported"] = True
-	return edac_data
-
-#This is the 16 byte value of the SEL entry as defined on page 411 of the IPMI v.2 rev1 spec:
-#Byte Field Description
-#1-2:Record ID
-#3:Record Type
-#4-7:Timestamp
-#8-9:Generator ID
-#10 EvM Rev
-#11 Sensor Type
-#12 Sensor Number
-#13 Event Dir / Event Type
-#14 Event Data 1
-#15 Event Data 2
-#16 Event Data 3 (encode the dimm number here)
-#This emulates what the SMI handler does for correctable ecc errors. 
-def generate_ecc_ipmi_msg(dimm):
-	os.popen("ipmitool raw 0xa 0x44 0x0d 0x00 0x02 0xb4 0x14 0x10 0x47 0x21 0x00 0x04 0x0c 0x00 0x6f 0x35 0xff 0x%x"%(dimm))
-	#toggle led on
-	os.popen("ipmitool raw 0x3A 0x08 0x00 0x%x 0x01" %(0x5f+dimm))
-def check_edac(new_edac):
-	# compare the ce count for each channel in EDAC
-	for ch in edac:
-		val = int(os.popen("cat "+ ch["path"]).read())
-		if val != ch["ce"] and not ch["reported"]: 
-			ch["ce"] = val
-			if ch["count"] > 2:
-				generate_ecc_ipmi_msg(ch["dimm"])
-				new_edac = mark_dimm_reported(ch["dimm"],new_edac)
-			else:
-				ch["count"] = ch["count"] + 1
-		else:
-			ch["count"] = 0 
-	return new_edac
-
-#Read EDAC data and map the real location data from the DMI data 
-#onto the channels.  The return data contains a list of channels
-#with real dimm location
-def setup_edac_data():
-	edac_data=read_edac()
-	dmi_data=read_dmi()	
-	if get_edac_size(edac_data) != get_dmi_size(dmi_data):
-		print_syslog("DMI and EDAC do not agree on the amount of memory.")
-		print_syslog("Proper reporting of ecc errors is not possible.")
-		print_syslog("Please check your system configuration.")
-		sys.exit(-1)
-	
-	if (is_hs21()):
-		return hs21_rework(edac_data)
-	
-	#below is for LS21
-	dmi_size = 0
-	dmi_dimm = -1
-	#loop through all the edac channels and assign dmi dimm info
-	for ch in edac_data:
-		if dmi_size == 0:
-			dmi_dimm = dmi_dimm + 1	
-			dmi_size = get_dmi_dimm_size(dmi_data,dmi_dimm)
-	
-		ch["dimm"] = get_dmi_dimm(dmi_data,dmi_dimm)
-		dmi_size = dmi_size - ch["size"]	
-	
-		#one or more edac channels should == DMI dimm size
-		if dmi_size < 0:
-			print_syslog("ERROR: DMI dimm sizes do not agree with EDAC.")
-			print_syslog("Proper reporting of ecc errors is not possible.")
-			print_syslog("Memory DIMMs with identical size may be required.")
-			print_syslog("Please check your system configuration.")
-			sys.exit(-1);
-
-	return edac_data
-#Do OS level tests. 
-def test_system(do_write):
-	if os.system("which ipmitool 1>/dev/null 2>&1") > 0:
-		print "ipmitool is not installed"
-		do_write = False
-	else:
-		try:
-			if int(os.popen("ipmitool raw 0x3A 0x1A 2>/dev/null").read()) == 0:
-				print "BMC reports that SMIs are off"
-			else:
-				print "BMC reports that SMIs are on"
-	
-		except: 
-			print "ipmitool present but kernel drivers not loaded"
-			do_write = False
-
-	if os.path.exists("/sys/devices/system/ibm_rtl/state"):
-		print "ibm_rtl appears to be loaded"
-
-		if check_rtpm_enabled():
-			print "BIOS reports SMIs are off"
-		else:
-			print "BIOS reports SMIs are on"
-	else:
-		print "ibm_rtl appears to not be loaded"
-
-	
-	edac_data = setup_edac_data()
-
-	print "EDAC information in order of appearance in DMI system " + \
-							"information\n"
-	for ch in edac_data:
-		print "DIMM %(dimm)d includes %(path)s" %ch + \
-			"\tSize is %(size)d MB with %(ce)d ECC errors\n" %ch
-		if do_write:
-			generate_ecc_ipmi_msg(ch["dimm"])
-	if do_write:
-		print "For each Channel above you should see an error " + \
-					"message in your BladeCenter log"
-
-# It all starts here:
-syslog.openlog("IBM ECC Detection Service")
-Test = False
-Write_Test = True  
-for arg in sys.argv:
-	if arg == "--test":
-		Test = True
-	if arg == "nowrite": 
-		Write_Test = False
-
-if Test:
-	print "Testing Started:"
-	test_system(Write_Test)
-	print "Testing Done: Service not started"
-	sys.exit(0)
-
-if not check_rtpm_enabled():
-	syslog.syslog("IBM Premium Real-Time Mode not enabled!")
-	print "IBM Premium Real-Time Mode not enabled!"
-	sys.exit(-1);
-		
-edac = setup_edac_data()
-	
-pid = os.fork()
-if (pid==0):
-	# we are the child
-	while True:
-		edac = check_edac(edac)
-		time.sleep(10)
+class edac_factory(type):
+    subclasses = {}
+    def __new__(cls, name, bases, attrs):
+        # print cls, name, bases, attrs
+        obj = type.__new__(cls, name, bases, attrs)
+        cls.subclasses[name] = obj
+        obj.subclasses = cls.subclasses
+        return obj
+
+class Edac(object):
+    __metaclass__ = edac_factory
+    sysfs_mc_name_path = '/sys/devices/system/edac/mc/mc0/mc_name'
+    # edac_mc_name should be defined in each subclass as the name the
+    # edac driver exports via the mc_name file in sysfs
+    edac_mc_name = 'Unsupported device'
+    edac_data = []
+
+    @staticmethod
+    def create():
+        obj = None
+        for n,s in Edac.subclasses.iteritems():
+            if s.probe():
+                obj = s()
+                break
+        if obj is None:
+            raise InvalidMachineError()
+        return obj
+
+    def __init__(self):
+        self.edac_data = self.read_edac()
+        self.dmi_data = self.read_dmi()
+        if self.get_edac_size() != self.get_dmi_size():
+            print_syslog("DMI and EDAC do not agree on the amount of memory.")
+            print_syslog("Proper reporting of ecc errors is not possible.")
+            print_syslog("Please check your system configuration.")
+            sys.exit(-1)
+
+        self.fixup_tables()
+
+    @staticmethod
+    def probe():
+        return False
+
+    def fixup_tables(self, tables):
+        pass
+
+    # Read EDAC data and map the real location data from the DMI data
+    # onto the channels.  The return data contains a list of channels
+    # with real dimm location
+    def setup_edac_data(self):
+        return edac_data
+
+    # Read and decode dmidecode
+    def read_dmi(self):
+        dmi_record = {"dimm": re.compile(r"Locator: (?P<dimm>DIMM\d+)"),
+                  "size": re.compile(r"Size: (?P<size>\d+) MB")}
+
+        dmi_handle = r"Handle [A-F\d]x[A-F\d]{4}\, DMI type \d+\, \d+ bytes\."
+
+        count = 0
+        dmi_list = []
+        for handle in re.split(dmi_handle, os.popen("dmidecode").read()):
+            if "Memory Device\n" in handle[:15]:
+                slot = {"count": count, "enabled": False, "dimm": None, "size": 0}
+                record = handle[15:].split("\n")
+
+                r = dmi_record["size"].match(record[4][1:])
+                if r:
+                    slot["enabled"] = True
+                    slot["size"] = int(r.group(1))
+                    dmi_list.append(slot);
+                    count = count + 1
+
+                r = dmi_record["dimm"].match(record[7][1:])
+                if r:
+                    slot["dimm"] = int(r.group("dimm")[4:])
+        # Reorder DMI entries to csrow order exposed by EDAC.
+        return dmi_list
+
+    # return the n'th (dimm) dimm in dmi_list
+    def get_dmi_dimm(self, dimm):
+        if dimm < 0 or dimm > len(self.dmi_data):
+            return -1
+        else:
+            return self.dmi_data[dimm]["dimm"]
+
+    # return the total amount of memory reported by dmi_list
+    def get_dmi_size(self):
+        size = 0
+        for dimm in self.dmi_data:
+            size = size +  dimm["size"]
+        return size
+
+    # return the "real dimm" value for a dimm
+    def get_dmi_dimm_size(self, dimm):
+        if dimm < 0 or dimm > len(self.dmi_data):
+            return 0
+        else:
+            return self.dmi_data[dimm]["size"]
+
+
+    # decode each cscrow dir and add all the channels
+    def get_rowdata(self, mcdir):
+        csrowlist = []
+        csrowdirs = sorted(os.listdir(mcdir))
+
+        for csrow in csrowdirs:
+            if csrow[0:5] == "csrow":
+                dpath=mcdir+"/"+csrow
+                chdir = os.popen("find " + dpath + " -name 'ch[0-9]_ce_count'").readlines()
+                size_num = int(open(dpath+"/size_mb").read().strip())/len(chdir)
+                for ch in chdir:
+                    ce_num = int(open(ch.strip()).read().strip())
+                    csrowlist.append(dict(name=csrow, path=ch, ce=ce_num, size=size_num, dimm=-1, count=0, reported=False))
+
+        return csrowlist
+
+    # read info on all EDAC channels
+    # data returned will not have proper location ("dimm")
+    def read_edac(self):
+        mcpath = "/sys/devices/system/edac/mc"
+        mclist = []
+        edac = []
+        # Check edac dir exists
+        if(not os.path.exists(mcpath)):
+            return mclist
+        # Get list of mc dirs
+        mcdir = sorted(os.listdir(mcpath))
+        for obj in mcdir:
+            if obj[0:2] == "mc":
+                dpath = mcpath+"/"+obj
+                mclist.extend(self.get_rowdata(dpath))
+        return mclist
+
+    # return the total size of memory reported by EDAC
+    def get_edac_size(self):
+        size = 0
+        for ch in self.edac_data:
+            size = size + ch["size"]
+        return size
+
+    def mark_dimm_reported(self, dimm):
+        for ch in self.edac_data:
+            if ch["dimm"] == dimm:
+                ch["reported"] = True
+
+    # This is the 16 byte value of the SEL entry as defined on p.411 of the IPMI v.2 rev1 spec:
+    # Byte Field Description
+    # 1-2:Record ID
+    # 3:Record Type
+    # 4-7:Timestamp
+    # 8-9:Generator ID
+    # 10 EvM Rev
+    # 11 Sensor Type
+    # 12 Sensor Number
+    # 13 Event Dir / Event Type
+    # 14 Event Data 1
+    # 15 Event Data 2
+    # 16 Event Data 3 (encode the dimm number here)
+    # This emulates what the SMI handler does for correctable ecc errors.
+    def generate_ecc_ipmi_msg(self, dimm):
+        os.popen("ipmitool raw 0xa 0x44 0x0d 0x00 0x02 0xb4 0x14 0x10 0x47 " +
+                     "0x21 0x00 0x04 0x0c 0x00 0x6f 0x35 0xff 0x%x"%(dimm))
+        # toggle led on
+        os.popen("ipmitool raw 0x3A 0x08 0x00 0x%x 0x01" %(0x5f+dimm))
+
+    def check_edac(self):
+        # compare the ce count for each channel in EDAC
+        for ch in self.edac_data:
+            val = int(os.popen("cat "+ ch["path"]).read())
+            if val != ch["ce"] and not ch["reported"]:
+                ch["ce"] = val
+                if ch["count"] > 2:
+                    self.generate_ecc_ipmi_msg(ch["dimm"])
+                    self.mark_dimm_reported(ch["dimm"])
+                else:
+                    ch["count"] = ch["count"] + 1
+            else:
+                ch["count"] = 0
+
+    # Do OS level tests.
+    def diagnostics(self, do_write):
+        print "Found a (%s) => %s" % (self.edac_mc_name, type(self))
+        if os.system("which ipmitool 1>/dev/null 2>&1") > 0:
+            print "ipmitool is not installed"
+            do_write = False
+        else:
+            try:
+                if int(os.popen("ipmitool raw 0x3A 0x1A 2>/dev/null").read()) == 0:
+                    print "BMC reports that SMIs are off"
+                else:
+                    print "BMC reports that SMIs are on"
+
+            except:
+                print "ipmitool present but kernel drivers not loaded"
+                do_write = False
+
+        if os.path.exists("/sys/devices/system/ibm_rtl/state"):
+            print "ibm_rtl appears to be loaded"
+
+            if check_rtpm_enabled():
+                print "BIOS reports SMIs are off"
+            else:
+                print "BIOS reports SMIs are on"
+        else:
+            print "ibm_rtl appears to not be loaded"
+
+
+        print "EDAC information in order of appearance in DMI system " + \
+                                "information\n"
+        for ch in self.edac_data:
+            print "DIMM %(dimm)d (%(n)d) includes %(path)s" %ch + \
+                "\tSize is %(size)d MB with %(ce)d ECC errors\n" %ch
+            if do_write:
+                self.generate_ecc_ipmi_msg(ch["dimm"])
+        if do_write:
+            print "For each Channel above you should see an error " + \
+                        "message in your BladeCenter log"
+
+class amd_edac(Edac):
+    def fixup_tables(self, order=None):
+        # amd generic stuff
+        dmi_size = 0
+        dmi_dimm = -1
+        if order != None:
+            new_data = []
+            for dimm in order:
+                for slot in self.dmi_data:
+                    if slot["dimm"] == dimm:
+                        new_data.append(slot);
+            self.dmi_data = new_data
+
+        # loop through all the edac channels and assign dmi dimm info
+        n = 0
+        for ch in self.edac_data:
+            if dmi_size == 0:
+                dmi_dimm = dmi_dimm + 1
+                dmi_size = self.get_dmi_dimm_size(dmi_dimm)
+                ch_per_dimm = dmi_size / ch["size"]
+
+            n = n + 1
+            if dmi_dimm & 0x01:
+                offset = -(n % ch_per_dimm)
+            else:
+                offset = (n+1) % ch_per_dimm
+            ch["dimm"] = self.get_dmi_dimm(dmi_dimm + offset)
+            ch["n"] = n
+            dmi_size = dmi_size - ch["size"]
+
+        # one or more edac channels should == DMI dimm size
+        if dmi_size != 0:
+            print_syslog("ERROR: DMI dimm sizes do not agree with EDAC.")
+            print_syslog("Proper reporting of ecc errors is not possible.")
+            print_syslog("Memory DIMMs with identical size may be required.")
+            print_syslog("Please check your system configuration.")
+            sys.exit(-1);
+
+class ls21_edac(amd_edac):
+    edac_mc_name = 'Athlon64/Opteron/Rev F'
+    def __init__(self):
+        super(ls21_edac, self).__init__()
+
+    @staticmethod
+    def probe():
+        exec_str = "grep '%s' %s" % (
+            ls21_edac.edac_mc_name, Edac.sysfs_mc_name_path
+        )
+        return os.popen(exec_str).readlines() != []
+
+    def fixup_tables(self):
+        order = None
+        super(ls21_edac, self).fixup_tables(order);
+        for ch in self.edac_data:
+            if ch["dimm"] % 2 == 0:
+                ch["dimm"] -= 1
+            else:
+                ch["dimm"] += 1
+
+class ls22_edac(amd_edac):
+    edac_mc_name = 'Family-F10h-Quad-Core'
+    def __init__(self):
+        super(ls22_edac, self).__init__()
+
+    @staticmethod
+    def probe():
+        exec_str = "grep '%s' %s" % (
+            ls22_edac.edac_mc_name, Edac.sysfs_mc_name_path
+        )
+        return os.popen(exec_str).readlines() != []
+
+    def fixup_tables(self):
+        order = 2,4,1,3,6,8,5,7
+        super(ls22_edac, self).fixup_tables(order);
+
+class hs21_edac(Edac):
+    edac_mc_name = 'I5000'
+    def __init__(self):
+        super(hs21_edac, self).__init__()
+
+    @staticmethod
+    def probe():
+        # check for 5000P memory controller and HS21 in dmidecode
+        exec_str = "grep '%s' %s" % (
+            hs21_edac.edac_mc_name, Edac.sysfs_mc_name_path
+        )
+        if os.popen(exec_str).readlines() != [] and \
+                os.popen("dmidecode | grep HS21").readlines() != []:
+            return True
+        return False
+
+    def fixup_tables(self):
+        ch_list = self.edac_data
+        ch_list.sort()
+        # With the channels sorted toplogy always looks like
+        order=4,2,5,7,4,2,5,7,3,1,6,8,3,1,6,8
+
+        for channel in range(0,len(ch_list)):
+            ch_list[channel]["dimm"] = order[channel]
+
+def usage(name, exitval=0):
+    print "Usage: %s [options]" % name
+    print " --test          run tests, but don't start daemon"
+    print " --nowrite       don't report test findings to BMC"
+    print " --pidfile f     write pid of daemon to file f (/var/run/%s.pid)" % name
+    print " --foreground    don't daemonize, run in foreground"
+    sys.exit(exitval)
+
+def main(args):
+    name = args[0].split('/')[-1]
+    Test = False
+    Write_Test = True
+    foreground = False
+    pidfile = "/var/run/ibm-prtmd.pid"
+    i = 1
+    while i < len(args):
+        arg = args[i]
+        if arg == "--test":
+            Test = True
+        elif arg == "--nowrite":
+            Write_Test = False
+        elif arg == "--pidfile":
+            i += 1
+            pidfile = args[i]
+        elif arg == "--foreground":
+            foreground = True
+        else:
+            ret = 0
+            if arg not in ["--help", "-h"]:
+                print "unknown argument: %s" % args[i]
+                ret = 1
+            usage(name, ret)
+        i += 1
+
+    syslog.openlog("IBM ECC Detection Service")
+    if Test:
+        print "Testing Started:"
+        try:
+            edac = Edac.create()
+        except InvalidMachineError:
+            print "No valid EDAC class found for this machine"
+            sys.exit(1)
+        edac.diagnostics(Write_Test)
+        print "Testing Done: Service not started"
+        sys.exit(0)
+
+    if not check_rtpm_enabled():
+        syslog.syslog("IBM Premium Real-Time Mode not enabled!")
+        print "IBM Premium Real-Time Mode not enabled!"
+        sys.exit(-1);
+
+    try:
+        edac = Edac.create()
+    except InvalidMachineError:
+        print "No valid EDAC class found for this machine"
+        sys.exit(1)
+
+    if not foreground:
+        daemonize()
+
+    # write pid to pidfile
+    open(pidfile, 'w+').write("%d\n" % os.getpid())
+
+    while True:
+        edac.check_edac()
+        sleep(10)
 
+if __name__ == '__main__':
+    main(sys.argv)