public class Apriori implements OptionHandler {
/** Soporte minimo. */
private double m_minSupport;
/** Cota inferior para soporte minimo. */
private double m_lowerBoundMinSupport;
/** Confianza minimo */
private double m_minConfidence;
/** Cantidad maxima de reglas inferidas. */
private int m_numRules;
/** Delta para soporte minimo. */
private double m_delta;
/** Nivel de significancia utilizada para test. */
private double m_significanceLevel;
/** Numero de ciclos. */
private int m_cycles;
/** El conjunto de todos los items. */
private FastVector m_Ls;
/** La misma informacion anterior, pero en tablas de hashing. */
private FastVector m_hashtables;
/** Una lista de reglas. */
private FastVector[] m_allTheRules;
/** Las instancias (transacciones) que se usaran. */
private Instances m_instances;
/** Mostrar conjunto de items? */
private boolean m_outputItemSets;
/**
* Constructor
*/
public Apriori() {
resetOptions();
}
/**
* Metodo que construye todos los conjutnos que cumplen con soporte minimo, y
* a partir de esto, todas las reglas que tambien cumplen con confianza minima.
*
* @param instances Instancias
* @exception Exception Si hay problemas
*/
public void buildAssociations(Instances instances) throws Exception {
double[] confidences, supports;
int[] indices;
FastVector[] sortedRuleSet;
if (instances.checkForStringAttributes()) {
throw new Exception("No se puede con atributos de Strings!");
}
// Baja soporte minimo
m_cycles = 0;
m_minSupport = 1.0;
do {
// Reserva espacio para variables
m_Ls = new FastVector();
m_hashtables = new FastVector();
m_allTheRules = new FastVector[3];
m_allTheRules[0] = new FastVector();
m_allTheRules[1] = new FastVector();
m_allTheRules[2] = new FastVector();
sortedRuleSet = new FastVector[3];
sortedRuleSet[0] = new FastVector();
sortedRuleSet[1] = new FastVector();
sortedRuleSet[2] = new FastVector();
// Encuentra grandes conjuntos y reglas
findLargeItemSets(instances);
if (m_significanceLevel != -1)
findRulesBruteForce();
else
findRulesQuickly();
// Ordena reglas segun soporte
supports = new double[m_allTheRules[2].size()];
for (int i = 0; i < m_allTheRules[2].size(); i++)
supports[i] = (double)((ItemSet)m_allTheRules[1].elementAt(i)).support();
indices = Utils.sort(supports);
for (int i = 0; i < m_allTheRules[2].size(); i++) {
sortedRuleSet[0].addElement(m_allTheRules[0].elementAt(indices[i]));
sortedRuleSet[1].addElement(m_allTheRules[1].elementAt(indices[i]));
sortedRuleSet[2].addElement(m_allTheRules[2].elementAt(indices[i]));
}
// Ordena reglas segun confianza
m_allTheRules[0].removeAllElements();
m_allTheRules[1].removeAllElements();
m_allTheRules[2].removeAllElements();
confidences = new double[sortedRuleSet[2].size()];
for (int i = 0; i < sortedRuleSet[2].size(); i++)
confidences[i] = ((Double)sortedRuleSet[2].elementAt(i)).doubleValue();
indices = Utils.sort(confidences);
for (int i = sortedRuleSet[0].size() - 1;
(i >= (sortedRuleSet[0].size() - m_numRules)) && (i >= 0); i--) {
m_allTheRules[0].addElement(sortedRuleSet[0].elementAt(indices[i]));
m_allTheRules[1].addElement(sortedRuleSet[1].elementAt(indices[i]));
m_allTheRules[2].addElement(sortedRuleSet[2].elementAt(indices[i]));
}
m_minSupport -= m_delta;
m_cycles++;
} while ((m_allTheRules[0].size() < m_numRules) &&
(Utils.grOrEq(m_minSupport, m_lowerBoundMinSupport)));
m_minSupport += m_delta;
}
/**
* Encuentra grandes conjuntos a partir de la instancia
*
* @param instances La instancia
* @exception Exception Si un atributo es numerico
*/
private void findLargeItemSets(Instances instances) throws Exception {
FastVector kMinusOneSets, kSets;
Hashtable hashtable;
int necSupport, i = 0;
m_instances = instances;
// Encuentra conjuntos
necSupport = (int)(m_minSupport * (double)instances.numInstances());
kSets = ItemSet.singletons(instances);
ItemSet.upDateCounters(kSets, instances);
kSets = ItemSet.deleteItemSets(kSets, necSupport);
if (kSets.size() == 0)
return;
do {
m_Ls.addElement(kSets);
kMinusOneSets = kSets;
kSets = ItemSet.mergeAllItemSets(kMinusOneSets, i);
hashtable = ItemSet.getHashtable(kMinusOneSets, kMinusOneSets.size());
m_hashtables.addElement(hashtable);
kSets = ItemSet.pruneItemSets(kSets, hashtable);
ItemSet.upDateCounters(kSets, instances);
kSets = ItemSet.deleteItemSets(kSets, necSupport);
i++;
} while (kSets.size() > 0);
}
/**
* Encuentra reglas
*
* @exception Exception Si un atributo es numerico
*/
private void findRulesBruteForce() throws Exception {
FastVector[] rules;
// genera reglas
for (int j = 1; j < m_Ls.size(); j++) {
FastVector currentItemSets = (FastVector)m_Ls.elementAt(j);
Enumeration enumItemSets = currentItemSets.elements();
while (enumItemSets.hasMoreElements()) {
ItemSet currentItemSet = (ItemSet)enumItemSets.nextElement();
rules=currentItemSet.generateRulesBruteForce(m_minConfidence,m_hashtables,j+1,
m_instances.numInstances(),
m_significanceLevel);
for (int k = 0; k < rules[0].size(); k++) {
m_allTheRules[0].addElement(rules[0].elementAt(k));
m_allTheRules[1].addElement(rules[1].elementAt(k));
m_allTheRules[2].addElement(rules[2].elementAt(k));
}
}
}
}
/**
* Encuentra todas las reglas
*
* @exception Exception Si un atributo es numerico
*/
private void findRulesQuickly() throws Exception {
FastVector[] rules;
// Genera reglas
for (int j = 1; j < m_Ls.size(); j++) {
FastVector currentItemSets = (FastVector)m_Ls.elementAt(j);
Enumeration enumItemSets = currentItemSets.elements();
while (enumItemSets.hasMoreElements()) {
ItemSet currentItemSet = (ItemSet)enumItemSets.nextElement();
rules = currentItemSet.generateRules(m_minConfidence, m_hashtables, j + 1);
for (int k = 0; k < rules[0].size(); k++) {
m_allTheRules[0].addElement(rules[0].elementAt(k));
m_allTheRules[1].addElement(rules[1].elementAt(k));
m_allTheRules[2].addElement(rules[2].elementAt(k));
}
}
}
}
/**
* Obtiene valor de delta
*
* @return Value El delta.
*/
public double getDelta() {
return m_delta;
}
/**
* Obtiene valor de lowerBoundMinSupport.
*
* @return Valor de lowerBoundMinSupport.
*/
public double getLowerBoundMinSupport() {
return m_lowerBoundMinSupport;
}
/**
* Obtiene valor de minConfidence.
*
* @return Valor de minConfidence.
*/
public double getMinConfidence() {
return m_minConfidence;
}
/**
* Obtiene valor de minSupport.
*
* @return Valor de minSupport.
*/
public double getMinSupport() {
return m_minSupport;
}
/**
* Obtiene valor de numRules.
*
* @return Valor de numRules.
*/
public int getNumRules() {
return m_numRules;
}
/**
* Obtiene opciones de objeto Apriori
*
* @return Arreglo de strings (opciones)
*/
public String [] getOptions() {
String [] options = new String [11];
int current = 0;
if (m_outputItemSets) {
options[current++] = "-I";
}
options[current++] = "-N"; options[current++] = "" + m_numRules;
options[current++] = "-C"; options[current++] = "" + m_minConfidence;
options[current++] = "-D"; options[current++] = "" + m_delta;
options[current++] = "-M"; options[current++] = "" + m_minSupport;
options[current++] = "-S"; options[current++] = "" + m_significanceLevel;
while (current < options.length) {
options[current++] = "";
}
return options;
}
/**
* Obtener el valor de significanceLevel.
*
* @return Valor de significanceLevel.
*/
public double getSignificanceLevel() {
return m_significanceLevel;
}
/**
* Muestra opciones validas
*
* @return an enumeration of all the available options
*/
public Enumeration listOptions() {
String string1 = "\tNumero de reglas requeridas. (default = " + m_numRules + ")",
string2 =
"\tConfianza minima de una regla. (default = " + m_minConfidence + ")",
string3 = "\tEl delta de decrecimiento del soporte minimo\n",
string4 = "\ten cada iteracion. (default = " + m_delta + ")",
string5 =
"\tCota inferior para soporte minimo. (default = " +
m_lowerBoundMinSupport + ")",
string6 = "\tSi utilizado, reglas son probadas por significancia en\n",
string7 = "\tel nivel dado. Mas Lento. (default = no )",
string8 = "\nSi es fijado, se muestran los conjuntos encontrados. (default = no)";
FastVector newVector = new FastVector(6);
newVector.addElement(new Option(string1, "N", 1,
"-N <Numero de reglas requeridas>"));
newVector.addElement(new Option(string2, "C", 1,
"-C <Confianza minima de regla>"));
newVector.addElement(new Option(string3 + string4, "D", 1,
"-D <delta de soporte minimo>"));
newVector.addElement(new Option(string5, "M", 1,
"-M <Cota inferior para soporte minimo>"));
newVector.addElement(new Option(string6 + string7, "S", 1,
"-S <Nivel de significancia>"));
newVector.addElement(new Option(string8, "S", 0,
"-I"));
return newVector.elements();
}
/**
* Main
*/
public static void main(String[] options) {
String trainFileString;
StringBuffer text = new StringBuffer();
Apriori apriori = new Apriori();
Reader reader;
try {
text.append("\n\nApriori opciones:\n\n");
text.append("-t <archivo de entrenamiento>\n");
text.append("\tNombre de archivo.\n");
Enumeration enum = apriori.listOptions();
while (enum.hasMoreElements()) {
Option option = (Option)enum.nextElement();
text.append(option.synopsis()+'\n');
text.append(option.description()+'\n');
}
trainFileString = Utils.getOption('t', options);
if (trainFileString.length() == 0)
throw new Exception("No se especifico archivo de entrenamiento!");
apriori.setOptions(options);
reader = new BufferedReader(new FileReader(trainFileString));
apriori.buildAssociations(new Instances(reader));
System.out.println(apriori);
} catch(Exception e) {
System.out.println("\n"+e.getMessage()+text);
}
}
/**
* Resetea los valores
*/
public void resetOptions() {
m_delta = 0.05;
m_minConfidence = 0.90;
m_numRules = 10;
m_lowerBoundMinSupport = 0.1;
m_significanceLevel = -1;
m_outputItemSets = false;
}
/**
* Fija el valor del delta
*
* @param v Valor a asignar a delta.
*/
public void setDelta(double v) {
m_delta = v;
}
/**
* Fija el valor de lowerBoundMinSupport.
*
* @param v Valor a asignar a lowerBoundMinSupport.
*/
public void setLowerBoundMinSupport(double v) {
m_lowerBoundMinSupport = v;
}
/**
* Fija el valor de minConfidence.
*
* @param v Valor a asignar a minConfidence.
*/
public void setMinConfidence(double v) {
m_minConfidence = v;
}
/**
* Fija el valor de minSupport.
*
* @param v Valor a asignar a minSupport.
*/
public void setMinSupport(double v) {
m_minSupport = v;
}
/**
* Fija el valor de numRules.
*
* @param v Valor a asignar a numRules.
*/
public void setNumRules(int v) {
m_numRules = v;
}
/**
* Filtra las opciones:<p>
*
* @param options Una lista de opciones, como arreglo de strings
* @exception Exception Si hay problemas con una opcion
*/
public void setOptions(String[] options) throws Exception{
resetOptions();
String numRulesString = Utils.getOption('N', options),
minConfidenceString = Utils.getOption('C', options),
deltaString = Utils.getOption('D', options),
minSupportString = Utils.getOption('M', options),
significanceLevelString = Utils.getOption('S', options);
if (numRulesString.length() != 0) {
m_numRules = Integer.parseInt(numRulesString);
}
if (minConfidenceString.length() != 0) {
m_minConfidence = (new Double(minConfidenceString)).doubleValue();
}
if (deltaString.length() != 0) {
m_delta = (new Double(deltaString)).doubleValue();
}
if (minSupportString.length() != 0) {
m_lowerBoundMinSupport = (new Double(minSupportString)).doubleValue();
}
if (significanceLevelString.length() != 0) {
m_significanceLevel = (new Double(significanceLevelString)).doubleValue();
}
m_outputItemSets = Utils.getFlag('I', options);
}
/**
* Fija el valor de significanceLevel.
*
* @param v Valor a asignar a significanceLevel.
*/
public void setSignificanceLevel(double v) {
m_significanceLevel = v;
}
/**
* Muestra los conjuntos y las reglas generadas
*/
public String toString() {
StringBuffer text = new StringBuffer();
if (m_Ls.size() <= 1)
return "\nNo se encontraron conjuntos o Reglas\n";
text.append("\nApriori\n=======\n");
text.append("Soporte Minimo: " + Utils.doubleToString(m_minSupport,2) + '\n');
text.append("Confianza Minima: "+Utils.doubleToString(m_minConfidence,2)+'\n');
if (m_significanceLevel != -1)
text.append("Nivel de Significancia: "+
Utils.doubleToString(m_significanceLevel,2)+'\n');
text.append("Numero de Ciclos: " + m_cycles+'\n');
text.append("\nGrandes cjtos de Itemes Generados:\n");
for (int i = 0; i < m_Ls.size(); i++) {
text.append("\nTamaño de Cjto. L("+(i+1)+"): "+
((FastVector)m_Ls.elementAt(i)).size()+'\n');
if (m_outputItemSets) {
text.append("\nConjunto L("+(i+1)+"):\n");
for (int j = 0; j < ((FastVector)m_Ls.elementAt(i)).size(); j++)
text.append(((ItemSet)((FastVector)m_Ls.elementAt(i)).elementAt(j)).
toString(m_instances)+"\n");
}
}
text.append("\nMejores Reglas Encontradas:\n\n");
for (int i = 0; i < m_allTheRules[0].size(); i++)
text.append(Utils.doubleToString((double)i+1,
(int)(Math.log(m_numRules)/Math.log(10)+1),0)+
". " + ((ItemSet)m_allTheRules[0].elementAt(i)).
toString(m_instances)
+ " ==> " + ((ItemSet)m_allTheRules[1].elementAt(i)).
toString(m_instances) +" ("+
Utils.doubleToString(((Double)m_allTheRules[2].
elementAt(i)).doubleValue(),2)+")\n");
return text.toString();
}
}
Copyright Nelson Flores 2001.
Departamento de Ciencias de la Computacion, Universidad de Chile.