View file File name : ras-mc-ctl Content :#!/usr/bin/perl -w #****************************************************************************** # Copyright (c) 2013 Mauro Carvalho Chehab <mchehab+redhat@kernel.org> # # This tool is a modification of the edac-ctl, written as part of the # edac-utils: # Copyright (C) 2003-2006 The Regents of the University of California. # Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). # Written by Mark Grondona <mgrondona@llnl.gov> # UCRL-CODE-230739. # # This version uses the new EDAC v 3.0.0 and upper API, with adds proper # representation for the memory controllers found on Intel designs after # 2002. It requires Linux Kernel 3.5 or upper to work. # # This is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License # for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. #****************************************************************************/ use strict; use File::Basename; use File::Find; use Getopt::Long; use POSIX; my $dbname = "/var/lib/rasdaemon/ras-mc_event.db"; my $prefix = "/usr"; my $sysconfdir = "/etc"; my $dmidecode = find_prog ("dmidecode"); my $modprobe = find_prog ("modprobe") or exit (1); my $has_aer = 0; my $has_arm = 0; my $has_extlog = 0; my $has_mem_failure = 0; my $has_mce = 0; $has_aer = 1; #$has_arm = 1; $has_extlog = 1; $has_mem_failure = 1; $has_mce = 1; my %conf = (); my %bus = (); my %dimm_size = (); my %dimm_node = (); my %dimm_label_file = (); my %dimm_location = (); my %csrow_size = (); my %rank_size = (); my %csrow_ranks = (); my %dimm_ce_count = (); my %dimm_ue_count = (); my @layers; my @max_pos; my @max_csrow; my $item_size; my $prog = basename $0; $conf{labeldb} = "$sysconfdir/ras/dimm_labels.db"; $conf{labeldir} = "$sysconfdir/ras/dimm_labels.d"; $conf{mbconfig} = "$sysconfdir/ras/mainboard"; my $status = 0; my $usage = <<EOF; Usage: $prog [OPTIONS...] --quiet Quiet operation. --mainboard Print mainboard vendor and model for this hardware. --status Print status of EDAC drivers. --print-labels Print Motherboard DIMM labels to stdout. --guess-labels Print DMI labels, when bank locator is available. --register-labels Load Motherboard DIMM labels into EDAC driver. --delay=N Delay N seconds before writing DIMM labels. --labeldb=DB Load label database from file DB. --layout Display the memory layout. --summary Presents a summary of the logged errors. --errors Shows the errors stored at the error database. --error-count Shows the corrected and uncorrected error counts using sysfs. --help This help message. EOF parse_cmdline(); if ( $conf{opt}{mainboard} || $conf{opt}{print_labels} || $conf{opt}{register_labels} || $conf{opt}{display_memory_layout} || $conf{opt}{guess_dimm_label} || $conf{opt}{error_count}) { get_mainboard_info(); if ($conf{opt}{mainboard} eq "report") { print "$prog: mainboard: ", "$conf{mainboard}{vendor} model $conf{mainboard}{model}\n"; } if ($conf{opt}{print_labels}) { print_dimm_labels (); } if ($conf{opt}{register_labels}) { register_dimm_labels (); } if ($conf{opt}{display_memory_layout}) { display_memory_layout (); } if ($conf{opt}{guess_dimm_label}) { guess_dimm_label (); } if ($conf{opt}{error_count}) { display_error_count (); } } if ($conf{opt}{status}) { $status = print_status (); exit ($status ? 0 : 1); } if ($conf{opt}{summary}) { summary (); } if ($conf{opt}{errors}) { errors (); } exit (0); sub parse_cmdline { $conf{opt}{mainboard} = ''; $conf{opt}{print_labels} = 0; $conf{opt}{register_labels} = 0; $conf{opt}{status} = 0; $conf{opt}{quiet} = 0; $conf{opt}{delay} = 0; $conf{opt}{display_memory_layout} = 0; $conf{opt}{guess_dimm_label} = 0; $conf{opt}{summary} = 0; $conf{opt}{errors} = 0; $conf{opt}{error_count} = 0; my $rref = \$conf{opt}{report}; my $mref = \$conf{opt}{mainboard}; Getopt::Long::Configure ("bundling"); my $rc = GetOptions ("mainboard:s" => sub { $$mref = $_[1]||"report" }, "help" => sub {usage (0)}, "quiet" => \$conf{opt}{quiet}, "print-labels" => \$conf{opt}{print_labels}, "guess-labels" => \$conf{opt}{guess_dimm_label}, "register-labels" => \$conf{opt}{register_labels}, "delay:s" => \$conf{opt}{delay}, "labeldb=s" => \$conf{labeldb}, "status" => \$conf{opt}{status}, "layout" => \$conf{opt}{display_memory_layout}, "summary" => \$conf{opt}{summary}, "errors" => \$conf{opt}{errors}, "error-count" => \$conf{opt}{error_count} ); usage(1) if !$rc; usage (0) if !grep $conf{opt}{$_}, keys %{$conf{opt}}; if ($conf{opt}{delay} && !$conf{opt}{register_labels}) { log_error ("Only use --delay with --register-labels\n"); exit (1); } } sub usage { my ($rc) = @_; print "$usage\n"; exit ($rc); } sub run_cmd { my @args = @_; system ("@args"); return ($?>>8); } sub print_status { my $status = 0; open (MODULES, "/proc/modules") or die "Unable to open /proc/modules: $!\n"; while (<MODULES>) { $status = 1 if /_edac/; } print "$prog: drivers ", ($status ? "are" : "not"), " loaded.\n" unless $conf{opt}{quiet}; return ($status); } sub parse_dimm_nodes { my $file = $File::Find::name; if (($file =~ /max_location$/)) { open IN, $file; my $location = <IN>; $location =~ s/\s+$//; close IN; my @temp = split(/ /, $location); $layers[0] = "mc"; if (m,/mc/mc(\d+),) { $max_pos[0] = $1 if (!exists($max_pos[0]) || $1 > $max_pos[0]); } else { $max_pos[0] = 0 if (!exists($max_pos[0])); } for (my $i = 0; $i < scalar(@temp); $i += 2) { $layers[$i / 2 + 1] = $temp[$i]; $max_pos[$i / 2 + 1] = $temp[$i + 1]; } return; } if ($file =~ /size_mb$/) { my $mc = $file; $mc =~ s,.*mc(\d+).*,$1,; my $csrow = $file; $csrow =~ s,.*csrow(\d+).*,$1,; open IN, $file; my $size = <IN>; close IN; my $str_loc = join(':', $mc, $csrow); $csrow_size{$str_loc} = $size; return; } if ($file =~ /location$/) { my $mc = $file; $mc =~ s,.*mc(\d+).*,$1,; my $dimm = $file; $dimm =~ s,.*dimm(\d+).*,$1,; open IN, $file; my $location = <IN>; $location =~ s/\s+$//; close IN; my @pos; # Get the name of the hierarchy labels if (!@layers) { my @temp = split(/ /, $location); $max_pos[0] = 0; $layers[0] = "mc"; for (my $i = 0; $i < scalar(@temp); $i += 2) { $layers[$i / 2 + 1] = $temp[$i]; $max_pos[$i / 2 + 1] = 0; } } my @temp = split(/ /, $location); for (my $i = 1; $i < scalar(@temp); $i += 2) { $pos[$i / 2] = $temp[$i]; if ($pos[$i / 2] > $max_pos[$i / 2]) { $max_pos[$i / 2 + 1] = $pos[$i / 2]; } } if ($mc > $max_pos[0]) { $max_pos[0] = $mc; } # Get DIMM size $file =~ s/dimm_location/size/; open IN, $file; my $size = <IN>; close IN; my $str_loc = join(':', $mc, @pos); $dimm_size{$str_loc} = $size; $dimm_node{$str_loc} = $dimm; $file =~ s/size/dimm_label/; $dimm_label_file{$str_loc} = $file; $dimm_location{$str_loc} = $location; my $count; $file =~s/dimm_label/dimm_ce_count/; if (-e $file) { open IN, $file; chomp($count = <IN>); close IN; } else { log_error ("dimm_ce_count not found in sysfs. Old kernel?\n"); exit -1; } $dimm_ce_count{$str_loc} = $count; $file =~s/dimm_ce_count/dimm_ue_count/; if (-e $file) { open IN, $file; chomp($count = <IN>); close IN; } else { log_error ("dimm_ue_count not found in sysfs. Old kernel?\n"); exit -1; } $dimm_ue_count{$str_loc} = $count; return; } } sub guess_product { my $pvendor = undef; my $pname = undef; if (open (VENDOR, "/sys/class/dmi/id/product_vendor")) { $pvendor = <VENDOR>; close VENDOR; chomp($pvendor); } if (open (NAME, "/sys/class/dmi/id/product_name")) { $pname = <NAME>; close NAME; chomp($pname); } return ($pvendor, $pname); } sub get_mainboard_info { my ($vendor, $model); my ($pvendor, $pname); if ($conf{opt}{mainboard} && $conf{opt}{mainboard} ne "report") { ($vendor, $model) = split (/[: ]/, $conf{opt}{mainboard}, 2); } if (!$vendor || !$model) { ($vendor, $model) = guess_vendor_model (); } $conf{mainboard}{vendor} = $vendor; $conf{mainboard}{model} = $model; ($pvendor, $pname) = guess_product (); # since product vendor is rare, use mainboard's vendor if ($pvendor) { $conf{mainboard}{product_vendor} = $pvendor; } else { $conf{mainboard}{product_vendor} = $vendor; } $conf{mainboard}{product_name} = $pname if $pname; } sub guess_vendor_model_dmidecode { my ($vendor, $model); my ($system_vendor, $system_model); my $line = 0; $< == 0 || die "Must be root to run dmidecode\n"; open (DMI, "$dmidecode |") or die "failed to run $dmidecode: $!\n"; $vendor = $model = ""; LINE: while (<DMI>) { $line++; /^(\s*)(board|base board|system) information/i || next LINE; my $indent = $1; my $type = $2; while ( <DMI> ) { /^(\s*)/; $1 lt $indent && last LINE; $indent = $1; if ($type eq "system") { /(?:manufacturer|vendor):\s*(.*\S)\s*/i && ( $system_vendor = $1 ); /product(?: name)?:\s*(.*\S)\s*/i && ( $system_model = $1 ); } else { /(?:manufacturer|vendor):\s*(.*\S)\s*/i && ( $vendor = $1 ); /product(?: name)?:\s*(.*\S)\s*/i && ( $model = $1 ); } last LINE if ($vendor && $model); } } close (DMI); $vendor = $system_vendor if ($vendor eq ""); $model = $system_model if ($model eq ""); return ($vendor, $model); } sub guess_vendor_model_sysfs { # # Try to look up DMI information in sysfs # open (VENDOR, "/sys/class/dmi/id/board_vendor") or return undef; open (MODEL, "/sys/class/dmi/id/board_name") or return undef; my ($vendor, $model) = (<VENDOR>, <MODEL>); close (VENDOR); close (MODEL); return undef unless ($vendor && $model); chomp ($vendor, $model); return ($vendor, $model); } sub parse_mainboard_config { my ($file) = @_; my %hash = (); my $line = 0; open (CFG, "$file") or die "Failed to read mainboard config: $file: $!\n"; while (<CFG>) { $line++; chomp; # remove newline s/^((?:[^'"#]*(?:(['"])[^\2]*\2)*)*)#.*/$1/; # remove comments s/^\s+//; # remove leading space s/\s+$//; # remove trailing space next unless length; # skip blank lines if (my ($key, $val) = /^\s*([-\w]+)\s*=\s*(.*)/) { $hash{$key}{val} = $val; $hash{$key}{line} = $line; next; } return undef; } close (CFG) or &log_error ("close $file: $!\n"); return \%hash; } sub guess_vendor_model { my ($vendor, $model); # # If mainboard config file exists then parse it # to get the vendor and model information. # if (-f $conf{mbconfig} ) { my $cfg = &parse_mainboard_config ($conf{mbconfig}); # If mainboard config file specified a script, then try to # run the specified script or executable: # if ($cfg->{"script"}) { $cfg = &parse_mainboard_config ("$cfg->{script}{val} |"); die "Failed to run mainboard script\n" if (!$cfg); } return ($cfg->{vendor}{val}, $cfg->{model}{val}); } ($vendor, $model) = &guess_vendor_model_sysfs (); return ($vendor, $model) if ($vendor && $model); return (&guess_vendor_model_dmidecode ()); } sub guess_dimm_label { open (DMI, "$dmidecode |") or die "failed to run $dmidecode: $!\n"; LINE: while (<DMI>) { /^(\s*)memory device$/i || next LINE; my ($dimm_label, $dimm_addr); while (<DMI>) { if (/^\s*(locator|bank locator)/i) { my $indent = $1; $indent =~ tr/A-Z/a-z/; if ($indent eq "locator") { /(?:locator):\s*(.*\S)\s*/i && ( $dimm_label = $1 ); } if ($indent eq "bank locator") { /(?:bank locator):\s*(.*\S)\s*/i && ( $dimm_addr = $1 ); } } if ($dimm_label && $dimm_addr) { printf "memory stick '%s' is located at '%s'\n", $dimm_label, $dimm_addr; next LINE; } next LINE if (/^\s*\n/); } } close (DMI); } sub parse_dimm_labels_file { my ($lh, $num_layers, $lh_prod, $num_layers_prod, $file) = (@_); my $line = -1; my $vendor = ""; my @models = (); my @products = (); my $num; open (LABELS, "$file") or die "Unable to open label database: $file: $!\n"; while (<LABELS>) { $line++; next if /^#/; chomp; s/^\s+//; s/\s+$//; next unless length; if (/vendor\s*:\s*(.*\S)\s*/i) { $vendor = lc $1; @models = (); @products = (); $num = 0; next; } if (/(model|board)\s*:\s*(.*)$/i) { !$vendor && die "$file: line $line: MB model without vendor\n"; @models = grep { s/\s*(.*)\s*$/$1/ } split(/[,;]+/, $2); @products = (); $num = 0; next; } if (/(product)\s*:\s*(.*)$/i) { !$vendor && die "$file: line $line: product without vendor\n"; @models = (); @products = grep { s/\s*(.*)\s*$/$1/ } split(/[,;]+/, $2); $num = 0; next; } # Allow multiple labels to be specified on a single line, # separated by ; for my $str (split /;/) { $str =~ s/^\s*(.*)\s*$/$1/; next unless (my ($label, $info) = ($str =~ /^(.*)\s*:\s*(.*)$/i)); unless ($info =~ /\d+(?:[\.\:]\d+)*/) { log_error ("$file: $line: Invalid syntax, ignoring: \"$_\"\n"); next; } for my $target (split (/[, ]+/, $info)) { my $n; my ($mc, $top, $mid, $low, $extra) = ($target =~ /(\d+)(?:[\.\:](\d+)){0,1}(?:[\.\:](\d+)){0,1}(?:[\.\:](\d+)){0,1}(?:[\.\:](\d+)){0,1}/); if (defined($extra)) { die ("Error: Only up to 3 layers are currently supported on label db \"$file\"\n"); return; } elsif (!defined($top)) { die ("Error: The label db \"$file\" is defining a zero-layers machine\n"); return; } else { $n = 3; if (!defined($low)) { $low = 0; $n--; } if (!defined($mid)) { $mid = 0; $n--; } map { $lh->{$vendor}{lc $_}{$mc}{$top}{$mid}{$low} = $label } @models; map { $lh_prod->{$vendor}{lc $_}{$mc}{$top}{$mid}{$low} = $label } @products; } if (!$num) { $num = $n; map { $num_layers->{$vendor}{lc $_} = $num } @models; map { $num_layers_prod->{$vendor}{lc $_} = $num } @products; } elsif ($num != $n) { die ("Error: Inconsistent number of layers at label db \"$file\"\n"); } } } } close (LABELS) or die "Error from label db \"$file\" : $!\n"; } sub parse_dimm_labels { my %labels = (); my %num_layers = (); my %labels_prod = (); my %num_layers_prod = (); # # Accrue all DIMM labels from the labels.db file, as # well as any files under the labels dir # for my $file ($conf{labeldb}, <$conf{labeldir}/*>) { next unless -r $file; parse_dimm_labels_file (\%labels, \%num_layers, \%labels_prod, \%num_layers_prod, $file); } return (\%labels, \%num_layers, \%labels_prod, \%num_layers_prod); } sub read_dimm_label { my ($num_layers, $mc, $top, $mid, $low) = @_; my $sysfs = "/sys/devices/system/edac/mc"; my $pos; $pos = "$mc:$top:$mid:$low" if ($num_layers == 3); $pos = "$mc:$top:$mid" if ($num_layers == 2); $pos = "$mc:$top" if ($num_layers == 1); if (!defined($dimm_node{$pos})) { my $label = "$pos missing"; $pos = ""; return ($label, $pos); } my $dimm = $dimm_node{$pos}; my $dimm_label_file = $dimm_label_file{$pos}; my $location = $dimm_location{$pos}; return ("label missing", "$pos missing") unless -f $dimm_label_file; if (!open (LABEL, "$dimm_label_file")) { warn "Failed to open $dimm_label_file: $!\n"; return ("Error"); } chomp (my $label = <LABEL> || ""); close (LABEL); $pos = "mc$mc $location"; return ($label, $pos); } sub get_dimm_label_node { my ($num_layers, $mc, $top, $mid, $low) = @_; my $sysfs = "/sys/devices/system/edac/mc"; my $pos = "$mc:$top:$mid:$low"; $pos = "$mc:$top:$mid:$low" if ($num_layers == 3); $pos = "$mc:$top:$mid" if ($num_layers == 2); $pos = "$mc:$top" if ($num_layers == 1); return "" if (!defined($dimm_node{$pos})); return "$dimm_label_file{$pos}"; } sub _print_dimm_labels { my ($lref, $num_layers, $vendor, $model, $fh, $format) = @_; for my $mc (sort keys %{$$lref{$vendor}{$model}}) { for my $top (sort keys %{$$lref{$vendor}{$model}{$mc}}) { for my $mid (sort keys %{$$lref{$vendor}{$model}{$mc}{$top}}) { for my $low (sort keys %{$$lref{$vendor}{$model}{$mc}{$top}{$mid}}) { my $label = $$lref{$vendor}{$model}{$mc}{$top}{$mid}{$low}; my ($rlabel,$loc) = read_dimm_label ($$num_layers{$vendor}{$model}, $mc, $top, $mid, $low); printf $fh $format, $loc, $label, $rlabel; } } } } print $fh "\n"; } sub print_dimm_labels { my $fh = shift || *STDOUT; my ($lref, $num_layers, $lref_prod, $num_layers_prod) = parse_dimm_labels (); my $vendor = lc $conf{mainboard}{vendor}; my $model = lc $conf{mainboard}{model}; my $pvendor = lc $conf{mainboard}{product_vendor}; my $pname = lc $conf{mainboard}{product_name}; my $format = "%-35s %-20s %-20s\n"; if (!exists $$lref{$vendor}{$model} && !exists $$lref_prod{$pvendor}{$pname}) { log_error ("No dimm labels for $conf{mainboard}{vendor} " . "model $conf{mainboard}{model}\n"); return; } my $sysfs_dir = "/sys/devices/system/edac/mc"; find({wanted => \&parse_dimm_nodes, no_chdir => 1}, $sysfs_dir); printf $fh $format, "LOCATION", "CONFIGURED LABEL", "SYSFS CONTENTS"; if (exists $$lref{$vendor}{$model}) { _print_dimm_labels($lref, $num_layers, $vendor, $model, $fh, $format); } elsif (exists $$lref_prod{$pvendor}{$pname}) { _print_dimm_labels($lref_prod, $num_layers_prod, $pvendor, $pname, $fh, $format); } } sub write_dimm_labels { my ($lref, $num_layers, $vendor, $model) = @_; for my $mc (sort keys %{$$lref{$vendor}{$model}}) { for my $top (sort keys %{$$lref{$vendor}{$model}{$mc}}) { for my $mid (sort keys %{$$lref{$vendor}{$model}{$mc}{$top}}) { for my $low (sort keys %{$$lref{$vendor}{$model}{$mc}{$top}{$mid}}) { my $file = get_dimm_label_node($$num_layers{$vendor}{$model}, $mc, $top, $mid, $low); # Ignore sysfs files that don't exist. Might just be # unpopulated bank. next unless -f $file; if (!open (DL, ">$file")) { warn ("Unable to open $file\n"); next; } syswrite DL, $$lref{$vendor}{$model}{$mc}{$top}{$mid}{$low}; close (DL); } } } } } sub register_dimm_labels { my ($lref, $num_layers, $lref_prod, $num_layers_prod) = parse_dimm_labels (); my $vendor = lc $conf{mainboard}{vendor}; my $model = lc $conf{mainboard}{model}; my $pvendor = lc $conf{mainboard}{product_vendor}; my $pname = lc $conf{mainboard}{product_name}; my $sysfs = "/sys/devices/system/edac/mc"; if (!exists $$lref{$vendor}{$model} && !exists $$lref_prod{$pvendor}{$pname}) { log_error ("No dimm labels for $conf{mainboard}{vendor} " . "model $conf{mainboard}{model}\n"); return 0; } my $sysfs_dir = "/sys/devices/system/edac/mc"; find({wanted => \&parse_dimm_nodes, no_chdir => 1}, $sysfs_dir); select (undef, undef, undef, $conf{opt}{delay}); if (exists $$lref{$vendor}{$model}) { write_dimm_labels($lref, $num_layers, $vendor, $model); } else { write_dimm_labels($lref_prod, $num_layers_prod, $pvendor, $pname); } return 1; } sub dimm_display_layer_rev($@); sub dimm_display_layer_rev($@) { my $layer = shift; my @pos = @_; $layer++; if ($layer >= scalar(@pos) - 1) { my $str_loc = join(':', @pos); my $size = $dimm_size{$str_loc}; if (!$size) { $size = 0; } my $s = sprintf " %4i MB |", $size; $item_size = length($s); return $s; } my $s; for (my $i = 0; $i <= $max_pos[$layer]; $i++) { $pos[$layer] = $i; $s .= dimm_display_layer_rev($layer, @pos); } return $s; } sub dimm_display_layer(@) { my @pos = @_; my $s; for (my $i = 0; $i <= $max_pos[0]; $i++) { $pos[0] = $i; $s .= dimm_display_layer_rev(0, @pos); } return $s; } sub dimm_display_layer_header($$) { my $n_items = 1; my $scale; my $layer = shift; my $tot_items = shift; my $s; for (my $i = 0; $i <= $layer; $i++) { $n_items *= $max_pos[$i] + 1; } $scale = $tot_items / $n_items; my $d = 0; for (my $i = 0; $i < $n_items; $i++) { my $val = sprintf("%s%d", $layers[$layer], $d); $val = substr($val, 0, $scale * $item_size - 2); my $fillsize = $scale * $item_size - 1 - length($val); $s .= "|"; $s .= " " x ($fillsize / 2); $s .= $val; $s .= " " x ($fillsize - floor($fillsize / 2)); $d++; if ($d > $max_pos[$layer]) { $d = 0; } } $s .= "|"; return $s; } sub dimm_display_mem() { my @pos = @max_pos; my $sep = ""; my $tot_items = 1; my $first = 1; for (my $i = 0; $i < scalar(@pos) - 1; $i++) { $pos[$i] = 0; $tot_items *= $max_pos[$i] + 1; } my $is_even = $max_pos[scalar(@max_pos) - 1] % 2; for (my $d = $max_pos[scalar(@max_pos) - 1]; $d >= 0; $d--) { my $len; my $s = sprintf("%s%d: |", $layers[scalar(@max_pos) - 1], $d); my $p1 = length($s) - 1; $pos[scalar(@pos) - 1] = $d; $s .= dimm_display_layer(@pos); $len += length($s); $sep = "-" x $p1; $sep .= "+"; $sep .= "-" x ($len - $p1 - 2); $sep .= "+"; if ($first) { my $sep1 = " " x $p1; $sep1 .= "+"; $sep1 .= "-" x ($len - $p1 - 2); $sep1 .= "+"; printf "$sep1\n"; for (my $layer = 0; $layer < scalar(@pos) - 1; $layer++) { my $s = sprintf("%s%d: |", $layers[scalar(@max_pos) - 1], 0); my $p1 = length($s) - 1; my $msg = " " x $p1; $msg .= dimm_display_layer_header($layer, $tot_items); printf "$msg\n"; } printf "$sep\n" if (!$is_even); $first = 0; } if ($is_even && (($max_pos[scalar(@max_pos) - 1] - $d) % 2 == 0)) { printf "$sep\n"; } printf "$s\n"; } printf "$sep\n"; } sub fill_csrow_size() { foreach my $str_loc (keys %rank_size) { my @temp = split(/:/, $str_loc); my $csrow = join(':', $temp[0], $temp[1]); if ($csrow_ranks{$csrow}) { $rank_size{$str_loc} = $csrow_size{$csrow} / $csrow_ranks{$csrow}; } } } sub display_memory_layout { my $sysfs_dir = "/sys/devices/system/edac/mc"; find({wanted => \&parse_dimm_nodes, no_chdir => 1}, $sysfs_dir); if (!scalar(%csrow_size)) { log_error ("No memories found at via edac.\n"); exit -1; } elsif (!scalar(%dimm_size)) { fill_csrow_size; $layers[0] = "mc"; $layers[1] = "csrow"; $layers[2] = "channel"; @max_pos = @max_csrow; %dimm_size = %rank_size; } dimm_display_mem(); } sub display_error_count { my $sysfs_dir = "/sys/devices/system/edac/mc"; my $key; my $max_width = 0; my %dimm_labels = (); find ({wanted => \&parse_dimm_nodes, no_chdir => 1}, $sysfs_dir); if (!scalar(keys %dimm_node)) { log_error ("No DIMMs found in /sys or new sysfs EDAC interface not found.\n"); exit -1; } foreach $key (keys %dimm_node) { my $label_width; open IN, $dimm_label_file{$key}; chomp(my $label = <IN>); close IN; $label_width = length $label; if ($label_width > $max_width) { $max_width = $label_width; } $dimm_labels{$key} = $label; } my $string = "Label"; $string .= " " x ($max_width - length $string); print($string . "\tCE\tUE\n"); foreach $key (keys %dimm_node) { my $ce_count = $dimm_ce_count{$key}; my $ue_count = $dimm_ue_count{$key}; print("$dimm_labels{$key}\t$ce_count\t$ue_count\n"); } } sub find_prog { my ($file) = @_; for my $dir ("/sbin", "/usr/sbin", split ':', $ENV{PATH}) { return "$dir/$file" if -x "$dir/$file"; } # log_error ("Failed to find $file in PATH\n"); return ""; } sub get_extlog_type { my @types; if ($_[0] < 0 || $_[0] > 15) { return "unknown-type"; } @types = ("unknown", "no error", "single-bit ECC", "multi-bit ECC", "single-symbol chipkill ECC", "multi-symbol chipkill ECC", "master abort", "target abort", "parity error", "watchdog timeout", "invalid address", "mirror Broken", "memory sparing", "scrub corrected error", "scrub uncorrected error", "physical memory map-out event", "unknown-type"); return $types[$_[0]]; } sub get_extlog_severity { my @sev; if ($_[0] < 0 || $_[0] > 3) { return "unknown-severity"; } @sev = ("recoverable", "fatal", "corrected", "informational", "unknown-severity"); return $sev[$_[0]]; } use constant { CPER_MEM_VALID_NODE => 0x0008, CPER_MEM_VALID_CARD => 0x0010, CPER_MEM_VALID_MODULE => 0x0020, CPER_MEM_VALID_BANK => 0x0040, CPER_MEM_VALID_DEVICE => 0x0080, CPER_MEM_VALID_ROW => 0x0100, CPER_MEM_VALID_COLUMN => 0x0200, CPER_MEM_VALID_BIT_POSITION => 0x0400, CPER_MEM_VALID_REQUESTOR_ID => 0x0800, CPER_MEM_VALID_RESPONDER_ID => 0x1000, CPER_MEM_VALID_TARGET_ID => 0x2000, CPER_MEM_VALID_ERROR_TYPE => 0x4000, CPER_MEM_VALID_RANK_NUMBER => 0x8000, CPER_MEM_VALID_CARD_HANDLE => 0x10000, CPER_MEM_VALID_MODULE_HANDLE => 0x20000, }; sub get_cper_data_text { my $cper_data = $_[0]; my ($validation_bits, $node, $card, $module, $bank, $device, $row, $column, $bit_pos, $requestor_id, $responder_id, $target_id, $rank, $mem_array_handle, $mem_dev_handle) = unpack 'QSSSSSSSSQQQSSS', $cper_data; my @out; if ($validation_bits & CPER_MEM_VALID_NODE) { push @out, (sprintf "node=%d", $node); } if ($validation_bits & CPER_MEM_VALID_CARD) { push @out, (sprintf "card=%d", $card); } if ($validation_bits & CPER_MEM_VALID_MODULE) { push @out, (sprintf "module=%d", $module); } if ($validation_bits & CPER_MEM_VALID_BANK) { push @out, (sprintf "bank=%d", $bank); } if ($validation_bits & CPER_MEM_VALID_DEVICE) { push @out, (sprintf "device=%d", $device); } if ($validation_bits & CPER_MEM_VALID_ROW) { push @out, (sprintf "row=%d", $row); } if ($validation_bits & CPER_MEM_VALID_COLUMN) { push @out, (sprintf "column=%d", $column); } if ($validation_bits & CPER_MEM_VALID_BIT_POSITION) { push @out, (sprintf "bit_position=%d", $bit_pos); } if ($validation_bits & CPER_MEM_VALID_REQUESTOR_ID) { push @out, (sprintf "0x%08x", $requestor_id); } if ($validation_bits & CPER_MEM_VALID_RESPONDER_ID) { push @out, (sprintf "0x%08x", $responder_id); } if ($validation_bits & CPER_MEM_VALID_TARGET_ID) { push @out, (sprintf "0x%08x", $target_id); } if ($validation_bits & CPER_MEM_VALID_RANK_NUMBER) { push @out, (sprintf "rank=%d", $rank); } if ($validation_bits & CPER_MEM_VALID_CARD_HANDLE) { push @out, (sprintf "mem_array_handle=%d", $mem_array_handle); } if ($validation_bits & CPER_MEM_VALID_MODULE_HANDLE) { push @out, (sprintf "mem_dev_handle=%d", $mem_dev_handle); } return join (", ", @out); } sub get_uuid_le { my $out = ""; my @bytes = unpack "C*", $_[0]; my @le16_table = (3, 2, 1, 0, 5, 4, 7, 6, 8, 9, 10, 11, 12, 13, 14, 15); for (my $i = 0; $i < 16; $i++) { $out .= sprintf "%.2x", $bytes[$le16_table[$i]]; if ($i == 3 or $i == 5 or $i == 7 or $i == 9) { $out .= "-"; } } return $out; } sub summary { require DBI; my ($query, $query_handle, $out); my ($err_type, $label, $mc, $top, $mid, $low, $count, $msg, $action_result); my ($etype, $severity, $etype_string, $severity_string); my ($affinity, $mpidr); my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {}); # Memory controller mc_event errors $query = "select err_type, label, mc, top_layer,middle_layer,lower_layer, count(*) from mc_event group by err_type, label, mc, top_layer, middle_layer, lower_layer"; $query_handle = $dbh->prepare($query); $query_handle->execute(); $query_handle->bind_columns(\($err_type, $label, $mc, $top, $mid, $low, $count)); $out = ""; while($query_handle->fetch()) { $out .= "\t$err_type on DIMM Label(s): '$label' location: $mc:$top:$mid:$low errors: $count\n"; } if ($out ne "") { print "Memory controller events summary:\n$out\n"; } else { print "No Memory errors.\n\n"; } $query_handle->finish; # PCIe AER aer_event errors if ($has_aer == 1) { $query = "select err_type, err_msg, count(*) from aer_event group by err_type, err_msg"; $query_handle = $dbh->prepare($query); $query_handle->execute(); $query_handle->bind_columns(\($err_type, $msg, $count)); $out = ""; while($query_handle->fetch()) { $out .= "\t$count $err_type errors: $msg\n"; } if ($out ne "") { print "PCIe AER events summary:\n$out\n"; } else { print "No PCIe AER errors.\n\n"; } $query_handle->finish; } # ARM processor arm_event errors if ($has_arm == 1) { $query = "select affinity, mpidr, count(*) from arm_event group by affinity, mpidr"; $query_handle = $dbh->prepare($query); $query_handle->execute(); $query_handle->bind_columns(\($affinity, $mpidr, $count)); $out = ""; while($query_handle->fetch()) { $out .= "\t$count errors\n"; } if ($out ne "") { print "ARM processor events summary:\n$out\n"; } else { print "No ARM processor errors.\n\n"; } $query_handle->finish; } # extlog errors if ($has_extlog == 1) { $query = "select etype, severity, count(*) from extlog_event group by etype, severity"; $query_handle = $dbh->prepare($query); $query_handle->execute(); $query_handle->bind_columns(\($etype, $severity, $count)); $out = ""; while($query_handle->fetch()) { $etype_string = get_extlog_type($etype); $severity_string = get_extlog_severity($severity); $out .= "\t$count $etype_string $severity_string errors\n"; } if ($out ne "") { print "Extlog records summary:\n$out\n"; } else { print "No Extlog errors.\n\n"; } $query_handle->finish; } # Memory failure errors if ($has_mem_failure == 1) { $query = "select action_result, count(*) from memory_failure_event group by action_result"; $query_handle = $dbh->prepare($query); $query_handle->execute(); $query_handle->bind_columns(\($action_result, $count)); $out = ""; while($query_handle->fetch()) { $out .= "\t$action_result errors: $count\n"; } if ($out ne "") { print "Memory failure events summary:\n$out\n"; } else { print "No Memory failure errors.\n\n"; } $query_handle->finish; } # MCE mce_record errors if ($has_mce == 1) { $query = "select error_msg, count(*) from mce_record group by error_msg"; $query_handle = $dbh->prepare($query); $query_handle->execute(); $query_handle->bind_columns(\($msg, $count)); $out = ""; while($query_handle->fetch()) { $out .= "\t$count $msg errors\n"; } if ($out ne "") { print "MCE records summary:\n$out"; } else { print "No MCE errors.\n"; } $query_handle->finish; } undef($dbh); } sub errors { require DBI; my ($query, $query_handle, $id, $time, $count, $type, $msg, $label, $mc, $top, $mid, $low, $addr, $grain, $syndrome, $detail, $out); my ($mcgcap,$mcgstatus, $status, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $mcgstatus_msg, $mcistatus_msg, $user_action, $mc_location); my ($timestamp, $etype, $severity, $etype_string, $severity_string, $fru_id, $fru_text, $cper_data); my ($error_count, $affinity, $mpidr, $r_state, $psci_state); my ($pfn, $page_type, $action_result); my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "", "", {}); # Memory controller mc_event errors $query = "select id, timestamp, err_count, err_type, err_msg, label, mc, top_layer,middle_layer,lower_layer, address, grain, syndrome, driver_detail from mc_event order by id"; $query_handle = $dbh->prepare($query); if (!$query_handle) { log_error ("mc_event table missing from $dbname. Run 'rasdaemon --record'.\n"); exit -1 } $query_handle->execute(); $query_handle->bind_columns(\($id, $time, $count, $type, $msg, $label, $mc, $top, $mid, $low, $addr, $grain, $syndrome, $detail)); $out = ""; while($query_handle->fetch()) { $out .= "$id $time $count $type error(s): $msg at $label location: $mc:$top:$mid:$low, addr $addr, grain $grain, syndrome $syndrome $detail\n"; } if ($out ne "") { print "Memory controller events:\n$out\n"; } else { print "No Memory errors.\n\n"; } $query_handle->finish; # PCIe AER aer_event errors if ($has_aer == 1) { $query = "select id, timestamp, err_type, err_msg from aer_event order by id"; $query_handle = $dbh->prepare($query); $query_handle->execute(); $query_handle->bind_columns(\($id, $time, $type, $msg)); $out = ""; while($query_handle->fetch()) { $out .= "$id $time $type error: $msg\n"; } if ($out ne "") { print "PCIe AER events:\n$out\n"; } else { print "No PCIe AER errors.\n\n"; } $query_handle->finish; } # ARM processor arm_event errors if ($has_arm == 1) { $query = "select id, timestamp, error_count, affinity, mpidr, running_state, psci_state from arm_event order by id"; $query_handle = $dbh->prepare($query); $query_handle->execute(); $query_handle->bind_columns(\($id, $timestamp, $error_count, $affinity, $mpidr, $r_state, $psci_state)); $out = ""; while($query_handle->fetch()) { $out .= "$id $timestamp error: "; $out .= "error_count=$error_count, " if ($error_count); $out .= "affinity_level=$affinity, "; $out .= sprintf "mpidr=0x%x, ", $mpidr; $out .= sprintf "running_state=0x%x, ", $r_state; $out .= sprintf "psci_state=0x%x", $psci_state; $out .= "\n"; } if ($out ne "") { print "ARM processor events:\n$out\n"; } else { print "No ARM processor errors.\n\n"; } $query_handle->finish; } # Extlog errors if ($has_extlog) { $query = "select id, timestamp, etype, severity, address, fru_id, fru_text, cper_data from extlog_event order by id"; $query_handle = $dbh->prepare($query); $query_handle->execute(); $query_handle->bind_columns(\($id, $timestamp, $etype, $severity, $addr, $fru_id, $fru_text, $cper_data)); $out = ""; while($query_handle->fetch()) { $etype_string = get_extlog_type($etype); $severity_string = get_extlog_severity($severity); $out .= "$id $timestamp error: "; $out .= "type=$etype_string, "; $out .= "severity=$severity_string, "; $out .= sprintf "address=0x%08x, ", $addr; $out .= sprintf "fru_id=%s, ", get_uuid_le($fru_id); $out .= "fru_text='$fru_text', "; $out .= get_cper_data_text($cper_data) if ($cper_data); $out .= "\n"; } if ($out ne "") { print "Extlog events:\n$out\n"; } else { print "No Extlog errors.\n\n"; } $query_handle->finish; } # Memory failure errors if ($has_mem_failure == 1) { $query = "select id, timestamp, pfn, page_type, action_result from memory_failure_event order by id"; $query_handle = $dbh->prepare($query); $query_handle->execute(); $query_handle->bind_columns(\($id, $timestamp, $pfn, $page_type, $action_result)); $out = ""; while($query_handle->fetch()) { $out .= "$id $timestamp error: "; $out .= "pfn=$pfn, page_type=$page_type, action_result=$action_result\n"; } if ($out ne "") { print "Memory failure events:\n$out\n"; } else { print "No Memory failure errors.\n\n"; } $query_handle->finish; } # MCE mce_record errors if ($has_mce == 1) { $query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, user_action, mc_location from mce_record order by id"; $query_handle = $dbh->prepare($query); $query_handle->execute(); $query_handle->bind_columns(\($id, $time, $mcgcap,$mcgstatus, $status, $addr, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $msg, $mcgstatus_msg, $mcistatus_msg, $user_action, $mc_location)); $out = ""; while($query_handle->fetch()) { $out .= "$id $time error: $msg"; $out .= ", CPU $cpuvendor" if ($cpuvendor); $out .= ", bank $bank_name" if ($bank_name); $out .= ", mcg $mcgstatus_msg" if ($mcgstatus_msg); $out .= ", mci $mcistatus_msg" if ($mcistatus_msg); $out .= ", $mc_location" if ($mc_location); $out .= ", $user_action" if ($user_action); $out .= sprintf ", mcgcap=0x%08x", $mcgcap if ($mcgcap); $out .= sprintf ", mcgstatus=0x%08x", $mcgstatus if ($mcgstatus); $out .= sprintf ", status=0x%08x", $status if ($status); $out .= sprintf ", addr=0x%08x", $addr if ($addr); $out .= sprintf ", misc=0x%08x", $misc if ($misc); $out .= sprintf ", ip=0x%08x", $ip if ($ip); $out .= sprintf ", tsc=0x%08x", $tsc if ($tsc); $out .= sprintf ", walltime=0x%08x", $walltime if ($walltime); $out .= sprintf ", cpu=0x%08x", $cpu if ($cpu); $out .= sprintf ", cpuid=0x%08x", $cpuid if ($cpuid); $out .= sprintf ", apicid=0x%08x", $apicid if ($apicid); $out .= sprintf ", socketid=0x%08x", $socketid if ($socketid); $out .= sprintf ", cs=0x%08x", $cs if ($cs); $out .= sprintf ", bank=0x%08x", $bank if ($bank); $out .= "\n"; } if ($out ne "") { print "MCE events:\n$out\n"; } else { print "No MCE errors.\n\n"; } $query_handle->finish; } undef($dbh); } sub log_msg { print STDERR "$prog: ", @_ unless $conf{opt}{quiet}; } sub log_error { log_msg ("Error: @_"); } # vi: ts=4 sw=4 expandtab