Previous Next

8.3 Anexo 3. Algoritmo Cobweb

 

 

public class Cobweb extends Clusterer implements OptionHandler{

static final double norm = 1/Math.sqrt(2*Math.PI);

// Clase Interna

class CTree {

public CTree siblings = null, children = null;

public int size; // Numero de hojas

public int nchildren; // Numero de hijos directos del nodo

public int [][] counts; // Para atributos nominales

public int clusterNum;

public double [][] totals; // Para atributos numericos

public boolean cutoff = false; // Verdadero si el nodo es cortado aqui

public Instance instance = null;

 

//---- Constructor -------------------------------------------------------

public CTree(Instance i) throws Exception {

instance = i;

size = 1;

nchildren = 0;

counts = makeCounts(); totals = makeTotals();

for (int a = 0; a < instance.numAttributes(); a++)

if (instance.attribute(a).isNominal())

counts[a][(int) instance.value(a)]++;

else {

totals[a][0] = instance.value(a);

totals[a][1] = instance.value(a)*instance.value(a);

}

}

public CTree copyNode() throws Exception {

// Copia valores, no los hijos

CTree copy = new CTree(instance);

copy.size = size;

for (int a = 0; a < instance.numAttributes(); a++)

if (instance.attribute(a).isNominal())

for (int v=0; v < instance.attribute(a).numValues(); v++)

copy.counts[a][v] = counts[a][v];

else for (int v=0; v < 2; v++) copy.totals[a][v] = totals[a][v];

return copy;

}

public void updateStats(CTree node)

throws Exception { // Modifica valores, pero no los hijos

size += node.size;

for (int a = 0; a < instance.numAttributes(); a++)

if (instance.attribute(a).isNominal())

for (int v=0; v < instance.attribute(a).numValues(); v++)

counts[a][v] += node.counts[a][v];

else for (int v=0; v < 2; v++) totals[a][v] += node.totals[a][v];

}

 

public void downdateStats(CTree node) throws Exception {

size -= node.size;

for (int a = 0; a < instance.numAttributes(); a++)

if (instance.attribute(a).isNominal())

for (int v=0; v < instance.attribute(a).numValues(); v++)

counts[a][v] -= node.counts[a][v];

else for (int v=0; v < 2; v++) totals[a][v] -= node.totals[a][v];

}

 

public double CU() throws Exception {

// Utilidad categorica

 

return TU()/nchildren;

}

public double TU() throws Exception {

// Utilidad total de los hijos

 

return(TU(children));

}

public double TU(CTree node)

throws Exception {

// Utilidad total de nodo + hermanos

 

double t = 0;

for (CTree n = node; n != null; n = n.siblings)

t += UIndividual(n);

return t;

}

public double UIndividual(CTree node)

throws Exception {

// Utilidad del nodo actual

 

double s = 0;

for (int a = 0; a < instance.numAttributes(); a++)

if (instance.attribute(a).isNominal())

for (int v=0; v < instance.attribute(a).numValues(); v++) {

double x = (double) node.counts[a][v]/node.size;

double y = (double) counts[a][v]/size;

s += x*x - y*y;

}

else

s += Cobweb.norm/node.sigma(a) - Cobweb.norm/sigma(a);

return(((double) node.size/size) * s);

}

public double sigma(int a) {

double x = totals[a][1] - totals[a][0]*totals[a][0]/size;

if (x <= 0) x = 0; else x = Math.sqrt(x/size);

return Math.max(Cobweb.acuity, x);

}

public double UMerged(CTree n1, CTree n2)

throws Exception {

// Utilidad de n1 unido con n2

n1.updateStats(n2);

double U = UIndividual(n1);

n1.downdateStats(n2);

return U;

}

public void addChildren(CTree node) {

if (node == null); // Hacer nada

else {

CTree n;

nchildren++;

for (n = node; n.siblings != null; n = n.siblings) nchildren++;

n.siblings = children;

children = node;

}

}

public void addChild(CTree node) {

nchildren++;

if (children == null) children = node;

else children.addSibling(node);

}

public void removeChild(CTree node) {

if (children == null) return;

if (children == node) {

children = children.siblings;

node.siblings = null;

nchildren--;

return;

}

for (CTree n = children; n.siblings != null; n = n.siblings)

if (n.siblings == node) {

n.siblings = node.siblings;

node.siblings = null;

nchildren--;

return;

}

}

public void addSibling(CTree node) {

node.siblings = siblings;

siblings = node;

}

private int[][] makeCounts() throws Exception {

int [][] counts = new int[instance.numAttributes()][];

for (int a = 0; a < instance.numAttributes(); a++)

if (instance.attribute(a).isNominal())

counts[a] = new int[instance.attribute(a).numValues()];

else counts[a] = null;

return counts;

}

 

private double[][] makeTotals() throws Exception {

double [][] totals = new double[instance.numAttributes()][];

for (int a = 0; a < instance.numAttributes(); a++)

if (instance.attribute(a).isNumeric())

totals[a] = new double[2];

else totals[a] = null;

return totals;

}

// Retorna descripcion del arbol

public String toString() {

try {

StringBuffer buf = new StringBuffer("\n");

print(buf, 0);

return buf.toString();

} catch (Exception e) {

e.printStackTrace();

return "No se puede imprimir.";

}

}

private int nChildren() {

int number = 0;

for (CTree n = children; n != null; n = n.siblings) number++;

return number;

}

private int nDescendants() {

int number = 0;

if (children == null) number = 1;

else for (CTree n = children; n != null; n = n.siblings)

number += n.nDescendants();

return number;

}

// Agrega una descripcion del arbol, en un nivel dado

private void print(StringBuffer buf, int level) throws Exception {

if (!cutoff) {

if (nchildren != nChildren())

System.out.println("Problema: nhijos = " + nchildren +

" pero hay "+ nChildren() + " hijos!");

if (size != nDescendants())

System.out.println("Problema: tamaño = " + size +

" pero hay "+ nDescendants() +

" descendientes!");

for (int j=0; j < level; j++) buf.append(" ");

buf.append(size + " | ");

for (int a = 0; a < instance.numAttributes(); a++) {

if (instance.attribute(a).isNominal())

for (int v=0; v < instance.attribute(a).numValues(); v++)

buf.append(((double) counts[a][v]) + " ");

else

buf.append(totals[a][0]/size + // " (" + totals[a][1]/size + ")" +

" " + sigma(a));

buf.append("| ");

}

if (this.nchildren > 0) buf.append(" (" + this.CU() + ")");

buf.append(" ["+clusterNum+"]");

buf.append("\n");

if (children != null) children.print(buf, level+1);

} // Nodos cortados no pueden tener hijos, pero pueden tener hermanos

if (siblings != null) siblings.print(buf, level);

}

private void assignClusterNums(int [] cl_num) throws Exception {

if (!cutoff) {

if (nchildren != nChildren())

System.out.println("Problema: nhijos = " + nchildren +

" pero hay "+ nChildren() + " hijos!");

if (size != nDescendants())

System.out.println("Problema: tamaño = " + size +

" pero hay "+ nDescendants() +

" descendientes!");

 

this.clusterNum = cl_num[0];

cl_num[0]++;

if (children != null)

children.assignClusterNums(cl_num);

}

 

if (siblings != null)

siblings.assignClusterNums(cl_num);

}

}

/** acuity */

static double acuity = 1.0;

 

/c **utoff */

static double cutoff = 0.01 * Cobweb.norm;

 

/** el arbol cobweb */

private CTree tree = null;

/** numero de clusters */

private int numClusters = -1;

/**

* Agrega ejemplo a arbol.

*

* @param node Nodo a agregar

* @param tree El arbol.

* @exception Exception Si algo sale mal

*/

public void add(CTree node, CTree tree) throws Exception {

if (tree.children == null) {

tree.addChild(tree.copyNode());

tree.addChild(node);

tree.updateStats(node);

}

else {

double T = tree.TU();

int n = tree.nchildren;

double baseU = T/n; // Utilidad base sobre hijos

double U = (tree.UIndividual(node) + T)/(n+1);

CTree host = bestHost(tree, node, U, baseU);

tree.updateStats(node);

if (host == null) tree.addChild(node);

else if (host.children == null) {

host.addChild(host.copyNode());

host.addChild(node);

host.updateStats(node);

if (host.CU() < cutoff) {

for (CTree n1 = host.children; n1 != null; n1 = n1.siblings)

n1.cutoff = true; // markarlos como cortados

return;

}

}

else add(node, host);

}

}

/**

* Encuentra el mejor lugar de agregar nodo

*

* @param tree El arbol.

* @param node El nodo a agregar

* @exception Exception Si algo sale mal

*/

public CTree bestHost(CTree tree, CTree node, double aU, double baseU)

throws Exception {

double oldaU = aU;

CTree a = null; // a es el mejor nodo hasta ahora

CTree b = null; double bU = 0; // b es el segundo mejor

for (CTree n = tree.children; n != null; n = n.siblings)

if (!n.cutoff) {

n.updateStats(node);

double nU = tree.CU();

n.downdateStats(node);

if (nU > bU)

if (nU > aU) { // n se transforma en el mejor, a es el segundo mejor ahora

b = a; a = n;

bU = aU; aU = nU;

}

else { // n es el segundo mejor

b = n;

bU = nU;

}

}

if (a == null) return null;

// Considera union entre mejor y segundo mejor

// utilidad actual es baseU = (a + b + rest)/n

// nueva utilidad sera (c + rest)/(n-1) [c es el nodo unificado]

// unir si (c + rest)/(n-1) > U, ie c - (a+b) + baseU > 0

if (b != null &&

tree.UMerged(a, b) - (tree.UIndividual(a)+tree.UIndividual(b)) + baseU

> 0) {

CTree c = a.copyNode();

c.updateStats(b);

tree.addChild(c);

tree.removeChild(a); tree.removeChild(b);

c.addChild(a); c.addChild(b);

return(c);

}

// c ahora sera el nodo partido

CTree c = a; // call c's children a, b, ...

// utilida actual es baseU = (c + rest)/n

// nueva utilidad sera (a + b + ... + rest)/(n - 1 + nchildren)

// partir si (a + b + ... + rest)/(n - 1 + nchildren) > baseU

// o sea si (a + b + ...) - c > (nchildren - 1)*baseU

if (a.children != null &&

c.TU() - tree.UIndividual(c) > (c.nchildren - 1)*baseU) {

tree.removeChild(c);

tree.addChildren(c.children);

// Now find the best host again

a = null; aU = 0; // a es el mejor nodo hasta ahora

for (CTree n = tree.children; n != null; n = n.siblings) {

n.updateStats(node);

double nU = tree.CU();

n.downdateStats(node);

if (nU > aU) {

a = n;

aU = nU;

}

}

}

return a;

}

/**

* encuentra el cluster de una instancia no vista

*

* @param tree el arbol

* @param node el nodo a agregar

* @exception Exception Si algo sale mal

*/

public CTree bestHostCluster(CTree tree, CTree node, double aU, double baseU)

throws Exception {

double oldaU = aU;

CTree a = null; // a es el mejor nodo

CTree b = null; double bU = 0; // b es el segundo mejor

for (CTree n = tree.children; n != null; n = n.siblings)

if (!n.cutoff) {

n.updateStats(node);

double nU = tree.CU();

n.downdateStats(node);

if (nU > bU)

if (nU > aU) { // n es el mejor ahora, a es el segundo mejor

b = a; a = n;

bU = aU; aU = nU;

}

else { // n es el segundo mejor

b = n;

bU = nU;

}

}

return a;

}

/**

* Construye clusterer

*

* @param data Las instancias de entrenamiento

* @exception Exception Si algo sale mal.

*/

public void buildClusterer(Instances data) throws Exception {

if (data.checkForStringAttributes()) {

throw new Exception("Solo atributos numericos");

}

tree = null;

numClusters = -1;

makeweb(data);

}

/**

* Construye clusters

*

* @param instance La instancia

* @exception Exception Si algo sale mal

*/

public int clusterInstance(Instance instance) throws Exception {

CTree node = new CTree(instance);

 

double T;

int n;

double baseU; // Utilidad base de nodos hijos

double U;

CTree host = tree;

CTree temp = null;

do {

if (host.children == null) {

temp = null;

}

else {

T = host.TU();

n = host.nchildren;

baseU = T/n; // Utilidad base de nodos hijos

U = (host.UIndividual(node) + T)/(n+1);

temp = bestHostCluster(host, node, U, baseU);

}

if (temp != null) {

host = temp;

}

} while (temp != null);

// System.out.println(host.clusterNum);

return host.clusterNum;

}

/**

* El valor de acuity

* @return El acuity como valor entre 0 and 100

*/

public int getAcuity() {

return (int) (acuity * 100.0);

}

/**

* El valor del cutoff

* @return Cutoff como valor entre 1 and 100

*/

public int getCutoff() {

return (int) (cutoff * 100.0);

}

/**

* Opciones

* @return Una arreglo de strings

*/

public String [] getOptions() {

 

String [] options = new String [4];

int current = 0;

options[current++] = "-A";

options[current++] = "" + ((int)(acuity * 100.0));

options[current++] = "-C";

options[current++] = "" + ((int)(cutoff * 100.0));

while (current < options.length) {

options[current++] = "";

}

return options;

}

/**

* Muestra opciones actuales

*

* @return Las opciones disponibles

**/

public Enumeration listOptions() {

 

Vector newVector = new Vector(2);

 

newVector.addElement(new Option("\tAcuity.\n"

+"\t(default=100)", "A", 1,"-A <0-100%>"));

newVector.addElement(new Option("\tCutoff.\n"

+"a\t(default=0)", "C", 1,"-C <0-100%>"));

 

return newVector.elements();

}

// Metodo principal

public static void main(String [] argv)

{

try {

System.out.println(ClusterEvaluation.evaluateClusterer(new Cobweb(),

argv));

}

catch (Exception e)

{

System.out.println(e.getMessage());

}

}

private void makeweb(Instances data) throws Exception {

Enumeration e = data.enumerateInstances();

while (e.hasMoreElements()) {

Instance i = (Instance) e.nextElement();

CTree node = new CTree(i);

if (tree == null) {

tree = new CTree(i);

tree.addChild(node);

}

else add(node, tree);

}

if (numClusters == -1)

{

int [] cl_num = new int [1];

cl_num[0] = 0;

tree.assignClusterNums(cl_num);

numClusters = cl_num[0];

}

 

// System.out.println(tree);

}

/**

* Retorna numero de clusters

*

* @exception Exception Si algo sale mal

*/

public int numberOfClusters() throws Exception {

return numClusters;

}

/**

* Fija el acuity

* @param a El acuity entre 0 and 100

*/

public void setAcuity(int a) {

acuity = (double) a / 100.0;

}

/**

* Fija el cutoff

* @param c cutoff entre 0 and 100

*/

public void setCutoff(int c) {

cutoff = (double) c / 100.0;;

}

/**

* Filtra opciones

*

* @param options Opciones

* @exception Exception Si algo sale mal

*

**/

public void setOptions(String[] options) throws Exception {

String optionString;

optionString = Utils.getOption('A', options);

if (optionString.length() != 0) {

setAcuity(Integer.parseInt(optionString));

}

else {

acuity = 1.0;

}

optionString = Utils.getOption('C', options);

if (optionString.length() != 0) {

setCutoff(Integer.parseInt(optionString));

}

else {

cutoff = 0.01 * Cobweb.norm;

}

}

/**

* Retorna descripcion de los clusters como un string

* @return Un string descriptivo

*/

public String toString() {

if (tree == null) {

return "Cobweb no ha sido construido!";

}

else {

return "Numero de Clusters: "+numClusters+"\n"+tree.toString();

}

}

}

 

 


Previous Next

 

Copyright Nelson Flores 2001.

Departamento de Ciencias de la Computacion, Universidad de Chile.