Source code for pysubgroup.constraints
import pysubgroup as ps
[docs]
class MinSupportConstraint:
"""
A constraint that ensures a subgroup has at least a minimum support.
Attributes:
min_support (int): The minimum number of instances that a subgroup must cover.
"""
def __init__(self, min_support):
"""
Initializes the MinSupportConstraint with the specified minimum support.
Parameters:
min_support (int): The minimum support required for subgroups.
"""
self.min_support = min_support
@property
def is_monotone(self):
"""
Indicates whether the constraint is monotone.
Returns:
bool: True if the constraint is monotone, False otherwise.
"""
return True
[docs]
def is_satisfied(self, subgroup, statistics=None, data=None):
"""
Checks if the subgroup satisfies the minimum support constraint.
Parameters:
subgroup: The subgroup to be evaluated.
statistics: Precomputed statistics for the subgroup (optional).
data: The dataset being analyzed (optional).
Returns:
bool: True if the subgroup's size is at least the minimum support,
False otherwise.
"""
if hasattr(statistics, "size_sg"):
return statistics.size_sg >= self.min_support
if isinstance(statistics, dict) and "size_sg" in statistics:
return statistics["size_sg"] >= self.min_support
try:
return ps.get_size(subgroup, len(data), data) >= self.min_support
except AttributeError: # Special case for gp_growth algorithm
return self.get_size_sg(statistics)
[docs]
def gp_prepare(self, qf):
"""
Prepares the constraint for the GP-Growth algorithm by accessing the size
function.
Parameters:
qf: The quality function used in the GP-Growth algorithm.
"""
self.get_size_sg = (
qf.gp_size_sg
) # pylint: disable=attribute-defined-outside-init
[docs]
def gp_is_satisfied(self, node):
"""
Checks if a node satisfies the constraint in the GP-Growth algorithm.
Parameters:
node: The node to be evaluated.
Returns:
bool: True if the node's size is at least the minimum support,
False otherwise.
"""
return self.get_size_sg(node) >= self.min_support
[docs]
class ContainsValueConstraint:
"""
A constraint that ensures a subgroup contains in its cover at least one instance that has a specified value for a specified attribute.
Attributes:
attribute_name: The attribute that needs to contain the specified value in at least one instance.
value: The value that needs to be present in the specified attribute in at least one instance.
"""
def __init__(self, attribute_name, value):
"""
Initializes the ContainsValueConstraint with the specified value and attribute.
Parameters:
attribute_name: The attribute that needs to contain the specified value in at least one instance.
value: The value that needs to be present in the specified attribute in at least one instance.
"""
self.attribute_name = attribute_name
self.value = value
@property
def is_monotone(self):
"""
Indicates whether the constraint is monotone.
Returns:
bool: True if the constraint is monotone, False otherwise.
"""
return True
[docs]
def is_satisfied(self, subgroup, statistics=None, data=None):
"""
Checks if the subgroup satisfies the constraint.
Parameters:
subgroup: The subgroup to be evaluated.
statistics: Precomputed statistics for the subgroup (optional).
data: The dataset being analyzed (optional).
Returns:
bool: True if the subgroup's cover contains at least one instance that has the specified value for the specified attribute (as defined during object construction),
False otherwise.
"""
return sum(data[self.attribute_name][subgroup.representation] == self.value) > 0
[docs]
class MinUniqueValuesConstraint:
"""
A constraint that ensures a subgroup contains in its cover a minimum number of unique values for a specified attribute.
Attributes:
attribute_name: The attribute that needs to contain at least the specified number of values.
min_unique_values: The minimum number of unique values that must be present in the attribute in a subgroup cover.
"""
def __init__(self, attribute_name, min_unique_values):
"""
Initializes the MinUniqueValuesConstraint with the specified attribute and minimum number of unique values.
Parameters:
attribute_name: The attribute that needs to contain at least the specified number of values.
min_unique_values: The minimum number of unique values that must be present in the attribute in a subgroup cover.
"""
self.attribute_name = attribute_name
self.min_unique_values = min_unique_values
@property
def is_monotone(self):
"""
Indicates whether the constraint is monotone.
Returns:
bool: True if the constraint is monotone, False otherwise.
"""
return True
[docs]
def is_satisfied(self, subgroup, statistics=None, data=None):
"""
Checks if the subgroup satisfies the constraint.
Parameters:
subgroup: The subgroup to be evaluated.
statistics: Precomputed statistics for the subgroup (optional).
data: The dataset being analyzed (optional).
Returns:
bool: True if the subgroup's cover contains the minimum number of unique values for the specified attribute (as defined during object construction),
False otherwise.
"""
return (
data[subgroup.representation][self.attribute_name].nunique()
>= self.min_unique_values
)