public class Cobweb extends Clusterer implements OptionHandler{
static final double norm = 1/Math.sqrt(2*Math.PI);
// Clase Interna
class CTree {
public CTree siblings = null, children = null;
public int size; // Numero de hojas
public int nchildren; // Numero de hijos directos del nodo
public int [][] counts; // Para atributos nominales
public int clusterNum;
public double [][] totals; // Para atributos numericos
public boolean cutoff = false; // Verdadero si el nodo es cortado aqui
public Instance instance = null;
//---- Constructor -------------------------------------------------------
public CTree(Instance i) throws Exception {
instance = i;
size = 1;
nchildren = 0;
counts = makeCounts(); totals = makeTotals();
for (int a = 0; a < instance.numAttributes(); a++)
if (instance.attribute(a).isNominal())
counts[a][(int) instance.value(a)]++;
else {
totals[a][0] = instance.value(a);
totals[a][1] = instance.value(a)*instance.value(a);
}
}
public CTree copyNode() throws Exception {
// Copia valores, no los hijos
CTree copy = new CTree(instance);
copy.size = size;
for (int a = 0; a < instance.numAttributes(); a++)
if (instance.attribute(a).isNominal())
for (int v=0; v < instance.attribute(a).numValues(); v++)
copy.counts[a][v] = counts[a][v];
else for (int v=0; v < 2; v++) copy.totals[a][v] = totals[a][v];
return copy;
}
public void updateStats(CTree node)
throws Exception { // Modifica valores, pero no los hijos
size += node.size;
for (int a = 0; a < instance.numAttributes(); a++)
if (instance.attribute(a).isNominal())
for (int v=0; v < instance.attribute(a).numValues(); v++)
counts[a][v] += node.counts[a][v];
else for (int v=0; v < 2; v++) totals[a][v] += node.totals[a][v];
}
public void downdateStats(CTree node) throws Exception {
size -= node.size;
for (int a = 0; a < instance.numAttributes(); a++)
if (instance.attribute(a).isNominal())
for (int v=0; v < instance.attribute(a).numValues(); v++)
counts[a][v] -= node.counts[a][v];
else for (int v=0; v < 2; v++) totals[a][v] -= node.totals[a][v];
}
public double CU() throws Exception {
// Utilidad categorica
return TU()/nchildren;
}
public double TU() throws Exception {
// Utilidad total de los hijos
return(TU(children));
}
public double TU(CTree node)
throws Exception {
// Utilidad total de nodo + hermanos
double t = 0;
for (CTree n = node; n != null; n = n.siblings)
t += UIndividual(n);
return t;
}
public double UIndividual(CTree node)
throws Exception {
// Utilidad del nodo actual
double s = 0;
for (int a = 0; a < instance.numAttributes(); a++)
if (instance.attribute(a).isNominal())
for (int v=0; v < instance.attribute(a).numValues(); v++) {
double x = (double) node.counts[a][v]/node.size;
double y = (double) counts[a][v]/size;
s += x*x - y*y;
}
else
s += Cobweb.norm/node.sigma(a) - Cobweb.norm/sigma(a);
return(((double) node.size/size) * s);
}
public double sigma(int a) {
double x = totals[a][1] - totals[a][0]*totals[a][0]/size;
if (x <= 0) x = 0; else x = Math.sqrt(x/size);
return Math.max(Cobweb.acuity, x);
}
public double UMerged(CTree n1, CTree n2)
throws Exception {
// Utilidad de n1 unido con n2
n1.updateStats(n2);
double U = UIndividual(n1);
n1.downdateStats(n2);
return U;
}
public void addChildren(CTree node) {
if (node == null); // Hacer nada
else {
CTree n;
nchildren++;
for (n = node; n.siblings != null; n = n.siblings) nchildren++;
n.siblings = children;
children = node;
}
}
public void addChild(CTree node) {
nchildren++;
if (children == null) children = node;
else children.addSibling(node);
}
public void removeChild(CTree node) {
if (children == null) return;
if (children == node) {
children = children.siblings;
node.siblings = null;
nchildren--;
return;
}
for (CTree n = children; n.siblings != null; n = n.siblings)
if (n.siblings == node) {
n.siblings = node.siblings;
node.siblings = null;
nchildren--;
return;
}
}
public void addSibling(CTree node) {
node.siblings = siblings;
siblings = node;
}
private int[][] makeCounts() throws Exception {
int [][] counts = new int[instance.numAttributes()][];
for (int a = 0; a < instance.numAttributes(); a++)
if (instance.attribute(a).isNominal())
counts[a] = new int[instance.attribute(a).numValues()];
else counts[a] = null;
return counts;
}
private double[][] makeTotals() throws Exception {
double [][] totals = new double[instance.numAttributes()][];
for (int a = 0; a < instance.numAttributes(); a++)
if (instance.attribute(a).isNumeric())
totals[a] = new double[2];
else totals[a] = null;
return totals;
}
// Retorna descripcion del arbol
public String toString() {
try {
StringBuffer buf = new StringBuffer("\n");
print(buf, 0);
return buf.toString();
} catch (Exception e) {
e.printStackTrace();
return "No se puede imprimir.";
}
}
private int nChildren() {
int number = 0;
for (CTree n = children; n != null; n = n.siblings) number++;
return number;
}
private int nDescendants() {
int number = 0;
if (children == null) number = 1;
else for (CTree n = children; n != null; n = n.siblings)
number += n.nDescendants();
return number;
}
// Agrega una descripcion del arbol, en un nivel dado
private void print(StringBuffer buf, int level) throws Exception {
if (!cutoff) {
if (nchildren != nChildren())
System.out.println("Problema: nhijos = " + nchildren +
" pero hay "+ nChildren() + " hijos!");
if (size != nDescendants())
System.out.println("Problema: tamaño = " + size +
" pero hay "+ nDescendants() +
" descendientes!");
for (int j=0; j < level; j++) buf.append(" ");
buf.append(size + " | ");
for (int a = 0; a < instance.numAttributes(); a++) {
if (instance.attribute(a).isNominal())
for (int v=0; v < instance.attribute(a).numValues(); v++)
buf.append(((double) counts[a][v]) + " ");
else
buf.append(totals[a][0]/size + // " (" + totals[a][1]/size + ")" +
" " + sigma(a));
buf.append("| ");
}
if (this.nchildren > 0) buf.append(" (" + this.CU() + ")");
buf.append(" ["+clusterNum+"]");
buf.append("\n");
if (children != null) children.print(buf, level+1);
} // Nodos cortados no pueden tener hijos, pero pueden tener hermanos
if (siblings != null) siblings.print(buf, level);
}
private void assignClusterNums(int [] cl_num) throws Exception {
if (!cutoff) {
if (nchildren != nChildren())
System.out.println("Problema: nhijos = " + nchildren +
" pero hay "+ nChildren() + " hijos!");
if (size != nDescendants())
System.out.println("Problema: tamaño = " + size +
" pero hay "+ nDescendants() +
" descendientes!");
this.clusterNum = cl_num[0];
cl_num[0]++;
if (children != null)
children.assignClusterNums(cl_num);
}
if (siblings != null)
siblings.assignClusterNums(cl_num);
}
}
/** acuity */
static double acuity = 1.0;
/c **utoff */
static double cutoff = 0.01 * Cobweb.norm;
/** el arbol cobweb */
private CTree tree = null;
/** numero de clusters */
private int numClusters = -1;
/**
* Agrega ejemplo a arbol.
*
* @param node Nodo a agregar
* @param tree El arbol.
* @exception Exception Si algo sale mal
*/
public void add(CTree node, CTree tree) throws Exception {
if (tree.children == null) {
tree.addChild(tree.copyNode());
tree.addChild(node);
tree.updateStats(node);
}
else {
double T = tree.TU();
int n = tree.nchildren;
double baseU = T/n; // Utilidad base sobre hijos
double U = (tree.UIndividual(node) + T)/(n+1);
CTree host = bestHost(tree, node, U, baseU);
tree.updateStats(node);
if (host == null) tree.addChild(node);
else if (host.children == null) {
host.addChild(host.copyNode());
host.addChild(node);
host.updateStats(node);
if (host.CU() < cutoff) {
for (CTree n1 = host.children; n1 != null; n1 = n1.siblings)
n1.cutoff = true; // markarlos como cortados
return;
}
}
else add(node, host);
}
}
/**
* Encuentra el mejor lugar de agregar nodo
*
* @param tree El arbol.
* @param node El nodo a agregar
* @exception Exception Si algo sale mal
*/
public CTree bestHost(CTree tree, CTree node, double aU, double baseU)
throws Exception {
double oldaU = aU;
CTree a = null; // a es el mejor nodo hasta ahora
CTree b = null; double bU = 0; // b es el segundo mejor
for (CTree n = tree.children; n != null; n = n.siblings)
if (!n.cutoff) {
n.updateStats(node);
double nU = tree.CU();
n.downdateStats(node);
if (nU > bU)
if (nU > aU) { // n se transforma en el mejor, a es el segundo mejor ahora
b = a; a = n;
bU = aU; aU = nU;
}
else { // n es el segundo mejor
b = n;
bU = nU;
}
}
if (a == null) return null;
// Considera union entre mejor y segundo mejor
// utilidad actual es baseU = (a + b + rest)/n
// nueva utilidad sera (c + rest)/(n-1) [c es el nodo unificado]
// unir si (c + rest)/(n-1) > U, ie c - (a+b) + baseU > 0
if (b != null &&
tree.UMerged(a, b) - (tree.UIndividual(a)+tree.UIndividual(b)) + baseU
> 0) {
CTree c = a.copyNode();
c.updateStats(b);
tree.addChild(c);
tree.removeChild(a); tree.removeChild(b);
c.addChild(a); c.addChild(b);
return(c);
}
// c ahora sera el nodo partido
CTree c = a; // call c's children a, b, ...
// utilida actual es baseU = (c + rest)/n
// nueva utilidad sera (a + b + ... + rest)/(n - 1 + nchildren)
// partir si (a + b + ... + rest)/(n - 1 + nchildren) > baseU
// o sea si (a + b + ...) - c > (nchildren - 1)*baseU
if (a.children != null &&
c.TU() - tree.UIndividual(c) > (c.nchildren - 1)*baseU) {
tree.removeChild(c);
tree.addChildren(c.children);
// Now find the best host again
a = null; aU = 0; // a es el mejor nodo hasta ahora
for (CTree n = tree.children; n != null; n = n.siblings) {
n.updateStats(node);
double nU = tree.CU();
n.downdateStats(node);
if (nU > aU) {
a = n;
aU = nU;
}
}
}
return a;
}
/**
* encuentra el cluster de una instancia no vista
*
* @param tree el arbol
* @param node el nodo a agregar
* @exception Exception Si algo sale mal
*/
public CTree bestHostCluster(CTree tree, CTree node, double aU, double baseU)
throws Exception {
double oldaU = aU;
CTree a = null; // a es el mejor nodo
CTree b = null; double bU = 0; // b es el segundo mejor
for (CTree n = tree.children; n != null; n = n.siblings)
if (!n.cutoff) {
n.updateStats(node);
double nU = tree.CU();
n.downdateStats(node);
if (nU > bU)
if (nU > aU) { // n es el mejor ahora, a es el segundo mejor
b = a; a = n;
bU = aU; aU = nU;
}
else { // n es el segundo mejor
b = n;
bU = nU;
}
}
return a;
}
/**
* Construye clusterer
*
* @param data Las instancias de entrenamiento
* @exception Exception Si algo sale mal.
*/
public void buildClusterer(Instances data) throws Exception {
if (data.checkForStringAttributes()) {
throw new Exception("Solo atributos numericos");
}
tree = null;
numClusters = -1;
makeweb(data);
}
/**
* Construye clusters
*
* @param instance La instancia
* @exception Exception Si algo sale mal
*/
public int clusterInstance(Instance instance) throws Exception {
CTree node = new CTree(instance);
double T;
int n;
double baseU; // Utilidad base de nodos hijos
double U;
CTree host = tree;
CTree temp = null;
do {
if (host.children == null) {
temp = null;
}
else {
T = host.TU();
n = host.nchildren;
baseU = T/n; // Utilidad base de nodos hijos
U = (host.UIndividual(node) + T)/(n+1);
temp = bestHostCluster(host, node, U, baseU);
}
if (temp != null) {
host = temp;
}
} while (temp != null);
// System.out.println(host.clusterNum);
return host.clusterNum;
}
/**
* El valor de acuity
* @return El acuity como valor entre 0 and 100
*/
public int getAcuity() {
return (int) (acuity * 100.0);
}
/**
* El valor del cutoff
* @return Cutoff como valor entre 1 and 100
*/
public int getCutoff() {
return (int) (cutoff * 100.0);
}
/**
* Opciones
* @return Una arreglo de strings
*/
public String [] getOptions() {
String [] options = new String [4];
int current = 0;
options[current++] = "-A";
options[current++] = "" + ((int)(acuity * 100.0));
options[current++] = "-C";
options[current++] = "" + ((int)(cutoff * 100.0));
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Muestra opciones actuales
*
* @return Las opciones disponibles
**/
public Enumeration listOptions() {
Vector newVector = new Vector(2);
newVector.addElement(new Option("\tAcuity.\n"
+"\t(default=100)", "A", 1,"-A <0-100%>"));
newVector.addElement(new Option("\tCutoff.\n"
+"a\t(default=0)", "C", 1,"-C <0-100%>"));
return newVector.elements();
}
// Metodo principal
public static void main(String [] argv)
{
try {
System.out.println(ClusterEvaluation.evaluateClusterer(new Cobweb(),
argv));
}
catch (Exception e)
{
System.out.println(e.getMessage());
}
}
private void makeweb(Instances data) throws Exception {
Enumeration e = data.enumerateInstances();
while (e.hasMoreElements()) {
Instance i = (Instance) e.nextElement();
CTree node = new CTree(i);
if (tree == null) {
tree = new CTree(i);
tree.addChild(node);
}
else add(node, tree);
}
if (numClusters == -1)
{
int [] cl_num = new int [1];
cl_num[0] = 0;
tree.assignClusterNums(cl_num);
numClusters = cl_num[0];
}
// System.out.println(tree);
}
/**
* Retorna numero de clusters
*
* @exception Exception Si algo sale mal
*/
public int numberOfClusters() throws Exception {
return numClusters;
}
/**
* Fija el acuity
* @param a El acuity entre 0 and 100
*/
public void setAcuity(int a) {
acuity = (double) a / 100.0;
}
/**
* Fija el cutoff
* @param c cutoff entre 0 and 100
*/
public void setCutoff(int c) {
cutoff = (double) c / 100.0;;
}
/**
* Filtra opciones
*
* @param options Opciones
* @exception Exception Si algo sale mal
*
**/
public void setOptions(String[] options) throws Exception {
String optionString;
optionString = Utils.getOption('A', options);
if (optionString.length() != 0) {
setAcuity(Integer.parseInt(optionString));
}
else {
acuity = 1.0;
}
optionString = Utils.getOption('C', options);
if (optionString.length() != 0) {
setCutoff(Integer.parseInt(optionString));
}
else {
cutoff = 0.01 * Cobweb.norm;
}
}
/**
* Retorna descripcion de los clusters como un string
* @return Un string descriptivo
*/
public String toString() {
if (tree == null) {
return "Cobweb no ha sido construido!";
}
else {
return "Numero de Clusters: "+numClusters+"\n"+tree.toString();
}
}
}
Copyright Nelson Flores 2001.
Departamento de Ciencias de la Computacion, Universidad de Chile.