Source code for latools.filtering.filt_obj

"""
An object used for storing, manipulating and modifying data filters.
"""

import re
import numpy as np
import pandas as pd
from difflib import SequenceMatcher as seqm
from latools.helpers.helpers import bool_2_indices, Bunch

[docs]class filt(object): """ Container for creating, storing and selecting data filters. Parameters ---------- size : int The length that the filters need to be (should be the same as your data). analytes : array_like A list of the analytes measured in your data. Attributes ---------- size : int The length that the filters need to be (should be the same as your data). analytes : array_like A list of the analytes measured in your data. components : dict A dict containing each individual filter that has been created. info : dict A dict containing descriptive information about each filter in `components`. params : dict A dict containing the parameters used to create each filter, which can be passed directly to the corresponding filter function to recreate the filter. switches : dict A dict of boolean switches specifying which filters are active for each analyte. keys : dict A dict of logical strings specifying which filters are applied to each analyte. sequence : dict A numbered dict specifying what order the filters were applied in (for some filters, order matters). n : int The number of filters applied to the data. """ def __init__(self, size, analytes): self.size = size self.analytes = analytes self.maxset = -1 findex = pd.MultiIndex(levels=[[], []], codes=[[], []], names=['N', 'filter']) self.fnames = [] self.filter_table = pd.DataFrame(index=findex, columns=self.analytes) self.filter_components = pd.DataFrame(index=np.arange(size), columns=findex) self.param = Bunch() self.info = Bunch() self.keydict = Bunch() self.N = 0
[docs] def add(self, name, filt, info='', params=(), setn=None): """ Add filter. Parameters ---------- name : str filter name filt : array_like boolean filter array info : str informative description of the filter params : tuple parameters used to make the filter setn : int the set number of the filter Returns ------- None """ if setn is None: setn = self.maxset + 1 self.maxset = setn # store params and info self.param[setn] = params self.info[setn] = info # store switches and filter self.filter_table.loc[(setn, name), :] = False self.filter_components.loc[:, (setn, name)] = filt self.fnames.append(f'{setn}:{name}')
[docs] def remove(self, name=None, setn=None): """ Remove filter. Parameters ---------- name : str name of the filter to remove setn : int or True int: number of set to remove True: remove all filters in set that 'name' belongs to Returns ------- None """ raise DeprecationWarning('This no longer works. Use `.filter_clear()` instead, then re-run the filters you want to keep.')
[docs] def clear(self): """ Clear all filters. """ self.__init__(self.size, self.analytes)
[docs] def clean(self): raise DeprecationWarning('This no longer works.')
[docs] def on(self, analyte=None, filt=None): """ Turn on specified filter(s) for specified analyte(s). Parameters ---------- analyte : optional, str or array_like Name or list of names of analytes. Defaults to all analytes. filt : optional. int, str or array_like Name/number or iterable names/numbers of filters. Returns ------- None """ if isinstance(analyte, str): analyte = [analyte] if analyte is None: analyte = self.analytes if isinstance(filt, str): # find filter name n, filt = self.fuzzmatch(filt, multi=True) self.filter_table.loc[(n, filt), analyte] = True
[docs] def off(self, analyte=None, filt=None): """ Turn off specified filter(s) for specified analyte(s). Parameters ---------- analyte : optional, str or array_like Name or list of names of analytes. Defaults to all analytes. filt : optional. int, list of int or str Number(s) or partial string that corresponds to filter name(s). Returns ------- None """ if isinstance(analyte, str): analyte = [analyte] if analyte is None: analyte = self.analytes if isinstance(filt, str): # find filter name n, filt = self.fuzzmatch(filt, multi=True) self.filter_table.loc[(n, filt), analyte] = False
[docs] def fuzzmatch(self, fuzzkey, multi=True): """ Identify a filter by fuzzy string matching. Partial ('fuzzy') matching performed by `fuzzywuzzy.fuzzy.ratio` Parameters ---------- fuzzkey : str A string that partially matches one filter name more than the others. Returns ------- The name of the most closely matched filter. : str """ keys, ratios = np.array([(f, seqm(None, fuzzkey, f).ratio()) for f in self.fnames]).T mratio = max(ratios) if multi: match = keys[ratios == mratio] else: if sum(ratios == mratio) == 1: match = keys[ratios == mratio][0] else: raise ValueError("\nThe filter key provided ('{:}') matches two or more filter names equally well:\n".format(fuzzkey) + ', '.join(keys[ratios == mratio]) + "\nBe more specific, or prepend the sequence number?") n, filt = match[0].split(':') return int(n), filt
[docs] def make_analyte(self, analyte): """ Make filter for specified analyte(s). Filter specified in filt.switches. Parameters ---------- analyte : str or array_like Name or list of names of analytes. Returns ------- array_like boolean filter """ if isinstance(analyte, str): analyte = [analyte] elif analyte is None: analyte = self.analytes key = [] for n, f in self.filter_table[analyte].index[self.filter_table[analyte].any(1)]: key.append(f'{n}:{f}') return self.make_fromkey('&'.join(key))
[docs] def make_fromkey(self, key): """ Make filter from logical expression. Takes a logical expression as an input, and returns a filter. Used for advanced filtering, where combinations of nested and/or filters are desired. Filter names must exactly match the names listed by print(filt). Example: ``key = '(Filter_1 | Filter_2) & Filter_3'`` is equivalent to: ``(Filter_1 OR Filter_2) AND Filter_3`` statements in parentheses are evaluated first. Parameters ---------- key : str logical expression describing filter construction. Returns ------- array_like boolean filter """ if key != '': def make_runable(match): return "self.filter_components.loc[:," + str(tuple(self.fuzzmatch(match.group(0)))) + "]" runable = re.sub('[^\(\)|& ]+', make_runable, key) return eval(runable).values else: return ~np.zeros(self.size, dtype=bool)
[docs] def make_keydict(self, analyte=None): """ Make logical expressions describing the filter(s) for specified analyte(s). Parameters ---------- analyte : optional, str or array_like Name or list of names of analytes. Defaults to all analytes. Returns ------- dict containing the logical filter expression for each analyte. """ if isinstance(analyte, str): analyte = [analyte] elif analyte is None: analyte = self.analytes for a in analyte: key = [] for n, f in self.filter_table[a].index[self.filter_table[a]]: key.append(f'{n}:{f}') self.keydict[a] = ' & '.join(key)
[docs] def grab_filt(self, filt, analyte=None): """ Flexible access to specific filter using any key format. Parameters ---------- f : str, dict or bool either logical filter expression, dict of expressions, or a boolean analyte : str name of analyte the filter is for. Returns ------- array_like boolean filter """ if isinstance(filt, str): if filt in self.fnames: fkey = self.fuzzmatch(filt) if analyte is None: return self.filter_components.loc[fkey].values else: if self.filter_table.loc[fkey, analyte]: return self.filter_components.loc[fkey].values else: try: ind = self.make_fromkey(filt) except KeyError: print(("\n\n***Filter key invalid. Please consult " "manual and try again.")) elif isinstance(filt, dict): try: ind = self.make_fromkey(filt[analyte]) except ValueError: print(("\n\n***Filter key invalid. Please consult manual " "and try again.\nOR\nAnalyte missing from filter " "key dict.")) elif filt: ind = self.make_analyte(analyte) else: ind = ~np.zeros(self.size, dtype=bool) return ind
[docs] def get_components(self, analyte): raise DeprecationWarning('This no longer works.')
[docs] def get_info(self): """ Get info for all filters. """ out = '' for k in sorted(self.info.keys()): out += f'{k}: {self.info[k]}\n' return(out)