Source code for pysubgroup.fi_target

"""
Created on 29.09.2017

@author: lemmerfn
"""
from collections import namedtuple
from functools import total_ordering

import pysubgroup as ps


[docs]@total_ordering class FITarget(ps.BaseTarget): statistic_types = ("size_sg", "size_dataset") def __repr__(self): return "T: Frequent Itemsets" def __eq__(self, other): return self.__dict__ == other.__dict__ def __lt__(self, other): return str(self) < str(other)
[docs] def get_attributes(self): return []
[docs] def get_base_statistics(self, subgroup, data): _, size = ps.get_cover_array_and_size(subgroup, len(data), data) return size
[docs] def calculate_statistics(self, subgroup_description, data, cached_statistics=None): if self.all_statistics_present(cached_statistics): return cached_statistics _, size = ps.get_cover_array_and_size(subgroup_description, len(data), data) statistics = {} statistics["size_sg"] = size statistics["size_dataset"] = len(data) return statistics
[docs]class SimpleCountQF(ps.AbstractInterestingnessMeasure): tpl = namedtuple("CountQF_parameters", ("size_sg")) gp_requires_cover_arr = False def __init__(self): self.required_stat_attrs = ("size_sg",) self.has_constant_statistics = True self.size_dataset = None
[docs] def calculate_constant_statistics( self, data, target ): # pylint: disable=unused-argument self.size_dataset = len(data)
[docs] def calculate_statistics( self, subgroup_description, target, data, statistics=None ): # pylint: disable=unused-argument _, size = ps.get_cover_array_and_size( subgroup_description, self.size_dataset, data ) return SimpleCountQF.tpl(size)
[docs] def gp_get_stats(self, _): return {"size_sg": 1}
[docs] def gp_get_null_vector(self): return {"size_sg": 0}
[docs] def gp_merge(self, left, right): left["size_sg"] += right["size_sg"]
[docs] def gp_get_params(self, _cover_arr, v): return SimpleCountQF.tpl(v["size_sg"])
[docs] def gp_to_str(self, stats): return str(stats["size_sg"])
[docs] def gp_size_sg(self, stats): return stats["size_sg"]
[docs]class CountQF(SimpleCountQF, ps.BoundedInterestingnessMeasure):
[docs] def evaluate(self, subgroup, target, data, statistics=None): statistics = self.ensure_statistics(subgroup, target, data, statistics) return statistics.size_sg
[docs] def optimistic_estimate(self, subgroup, target, data, statistics=None): statistics = self.ensure_statistics(subgroup, target, data, statistics) return statistics.size_sg
[docs]class AreaQF(SimpleCountQF):
[docs] def evaluate(self, subgroup, target, data, statistics=None): statistics = self.ensure_statistics(subgroup, target, data, statistics) return statistics.size_sg * subgroup.depth