"""
An object used for storing, manipulating and modifying data filters.
"""
import re
import numpy as np
from difflib import SequenceMatcher as seqm
from latools.helpers.helpers import bool_2_indices
[docs]class filt(object):
"""
Container for creating, storing and selecting data filters.
Parameters
----------
size : int
The length that the filters need to be (should be
the same as your data).
analytes : array_like
A list of the analytes measured in your data.
Attributes
----------
size : int
The length that the filters need to be (should be
the same as your data).
analytes : array_like
A list of the analytes measured in your data.
components : dict
A dict containing each individual filter that has been
created.
info : dict
A dict containing descriptive information about each
filter in `components`.
params : dict
A dict containing the parameters used to create
each filter, which can be passed directly to the
corresponding filter function to recreate the filter.
switches : dict
A dict of boolean switches specifying which filters
are active for each analyte.
keys : dict
A dict of logical strings specifying which filters are
applied to each analyte.
sequence : dict
A numbered dict specifying what order the filters were
applied in (for some filters, order matters).
n : int
The number of filters applied to the data.
"""
def __init__(self, size, analytes):
self.size = size
self.analytes = analytes
self.index = {}
self.sets = {}
self.maxset = -1
self.components = {}
self.info = {}
self.params = {}
self.keys = {}
self.n = 0
self.switches = {}
self.sequence = {}
for a in self.analytes:
self.switches[a] = {}
def __repr__(self):
apad = max([len(a) for a in self.analytes] + [7])
astr = '{:' + '{:.0f}'.format(apad) + 's}'
leftpad = max([len(s) for s
in self.components.keys()] + [11]) + 2
out = '{string:{number}s}'.format(string='n', number=3)
out += '{string:{number}s}'.format(string='Filter Name', number=leftpad)
for a in self.analytes:
out += astr.format(a)
out += '\n'
reg = re.compile('[0-9]+_(.*)')
for n, t in self.index.items():
out += '{string:{number}s}'.format(string=str(n), number=3)
tn = reg.match(t).groups()[0]
out += '{string:{number}s}'.format(string=str(tn), number=leftpad)
for a in self.analytes:
out += astr.format(str(self.switches[a][t]))
out += '\n'
return(out)
[docs] def add(self, name, filt, info='', params=(), setn=None):
"""
Add filter.
Parameters
----------
name : str
filter name
filt : array_like
boolean filter array
info : str
informative description of the filter
params : tuple
parameters used to make the filter
Returns
-------
None
"""
iname = '{:.0f}_'.format(self.n) + name
self.index[self.n] = iname
if setn is None:
setn = self.maxset + 1
self.maxset = setn
if setn not in self.sets.keys():
self.sets[setn] = [iname]
else:
self.sets[setn].append(iname)
# self.keys is not added to?
self.components[iname] = filt
self.info[iname] = info
self.params[iname] = params
for a in self.analytes:
self.switches[a][iname] = False
self.n += 1
return
[docs] def remove(self, name=None, setn=None):
"""
Remove filter.
Parameters
----------
name : str
name of the filter to remove
setn : int or True
int: number of set to remove
True: remove all filters in set that 'name' belongs to
Returns
-------
None
"""
if isinstance(name, int):
name = self.index[name]
if setn is not None:
name = self.sets[setn]
del self.sets[setn]
elif isinstance(name, (int, str)):
name = [name]
if setn is True:
for n in name:
for k, v in self.sets.items():
if n in v:
name.append([m for m in v if m != n])
for n in name:
for k, v in self.sets.items():
if n in v:
self.sets[k] = [m for m in v if n != m]
del self.components[n]
del self.info[n]
del self.params[n]
del self.keys[n]
for a in self.analytes:
del self.switches[a][n]
return
[docs] def clear(self):
"""
Clear all filters.
"""
self.components = {}
self.info = {}
self.params = {}
self.switches = {}
self.keys = {}
self.index = {}
self.sets = {}
self.maxset = -1
self.n = 0
for a in self.analytes:
self.switches[a] = {}
return
[docs] def clean(self):
"""
Remove unused filters.
"""
for f in sorted(self.components.keys()):
unused = not any(self.switches[a][f] for a in self.analytes)
if unused:
self.remove(f)
[docs] def on(self, analyte=None, filt=None):
"""
Turn on specified filter(s) for specified analyte(s).
Parameters
----------
analyte : optional, str or array_like
Name or list of names of analytes.
Defaults to all analytes.
filt : optional. int, str or array_like
Name/number or iterable names/numbers of filters.
Returns
-------
None
"""
if isinstance(analyte, str):
analyte = [analyte]
if isinstance(filt, (int, float)):
filt = [filt]
elif isinstance(filt, str):
filt = self.fuzzmatch(filt, multi=True)
if analyte is None:
analyte = self.analytes
if filt is None:
filt = list(self.index.values())
for a in analyte:
for f in filt:
if isinstance(f, (int, float)):
f = self.index[int(f)]
try:
self.switches[a][f] = True
except KeyError:
f = self.fuzzmatch(f, multi=False)
self.switches[a][f] = True
# for k in self.switches[a].keys():
# if f in k:
# self.switches[a][k] = True
return
[docs] def off(self, analyte=None, filt=None):
"""
Turn off specified filter(s) for specified analyte(s).
Parameters
----------
analyte : optional, str or array_like
Name or list of names of analytes.
Defaults to all analytes.
filt : optional. int, list of int or str
Number(s) or partial string that corresponds to filter name(s).
Returns
-------
None
"""
if isinstance(analyte, str):
analyte = [analyte]
if isinstance(filt, (int, float)):
filt = [filt]
elif isinstance(filt, str):
filt = self.fuzzmatch(filt, multi=True)
if analyte is None:
analyte = self.analytes
if filt is None:
filt = list(self.index.values())
for a in analyte:
for f in filt:
if isinstance(f, int):
f = self.index[f]
try:
self.switches[a][f] = False
except KeyError:
f = self.fuzzmatch(f, multi=False)
self.switches[a][f] = False
# for k in self.switches[a].keys():
# if f in k:
# self.switches[a][k] = False
return
[docs] def make(self, analyte):
"""
Make filter for specified analyte(s).
Filter specified in filt.switches.
Parameters
----------
analyte : str or array_like
Name or list of names of analytes.
Returns
-------
array_like
boolean filter
"""
if analyte is None:
analyte = self.analytes
elif isinstance(analyte, str):
analyte = [analyte]
out = []
for f in self.components.keys():
for a in analyte:
if self.switches[a][f]:
out.append(f)
key = ' & '.join(sorted(out))
for a in analyte:
self.keys[a] = key
return self.make_fromkey(key)
[docs] def fuzzmatch(self, fuzzkey, multi=False):
"""
Identify a filter by fuzzy string matching.
Partial ('fuzzy') matching performed by `fuzzywuzzy.fuzzy.ratio`
Parameters
----------
fuzzkey : str
A string that partially matches one filter name more than the others.
Returns
-------
The name of the most closely matched filter. : str
"""
keys, ratios = np.array([(f, seqm(None, fuzzkey, f).ratio()) for f in self.components.keys()]).T
mratio = max(ratios)
if multi:
return keys[ratios == mratio]
else:
if sum(ratios == mratio) == 1:
return keys[ratios == mratio][0]
else:
raise ValueError("\nThe filter key provided ('{:}') matches two or more filter names equally well:\n".format(fuzzkey) + ', '.join(keys[ratios == mratio]) + "\nPlease be more specific!")
[docs] def make_fromkey(self, key):
"""
Make filter from logical expression.
Takes a logical expression as an input, and returns a filter. Used for advanced
filtering, where combinations of nested and/or filters are desired. Filter names must
exactly match the names listed by print(filt).
Example: ``key = '(Filter_1 | Filter_2) & Filter_3'``
is equivalent to:
``(Filter_1 OR Filter_2) AND Filter_3``
statements in parentheses are evaluated first.
Parameters
----------
key : str
logical expression describing filter construction.
Returns
-------
array_like
boolean filter
"""
if key != '':
def make_runable(match):
return "self.components['" + self.fuzzmatch(match.group(0)) + "']"
runable = re.sub('[^\(\)|& ]+', make_runable, key)
return eval(runable)
else:
return ~np.zeros(self.size, dtype=bool)
[docs] def make_keydict(self, analyte=None):
"""
Make logical expressions describing the filter(s) for specified analyte(s).
Parameters
----------
analyte : optional, str or array_like
Name or list of names of analytes.
Defaults to all analytes.
Returns
-------
dict
containing the logical filter expression for each analyte.
"""
if analyte is None:
analyte = self.analytes
elif isinstance(analyte, str):
analyte = [analyte]
out = {}
for a in analyte:
key = []
for f in self.components.keys():
if self.switches[a][f]:
key.append(f)
out[a] = ' & '.join(sorted(key))
self.keydict = out
return out
[docs] def grab_filt(self, filt, analyte=None):
"""
Flexible access to specific filter using any key format.
Parameters
----------
f : str, dict or bool
either logical filter expression, dict of expressions,
or a boolean
analyte : str
name of analyte the filter is for.
Returns
-------
array_like
boolean filter
"""
if isinstance(filt, str):
if filt in self.components:
if analyte is None:
return self.components[filt]
else:
if self.switches[analyte][filt]:
return self.components[filt]
else:
try:
ind = self.make_fromkey(filt)
except KeyError:
print(("\n\n***Filter key invalid. Please consult "
"manual and try again."))
elif isinstance(filt, dict):
try:
ind = self.make_fromkey(filt[analyte])
except ValueError:
print(("\n\n***Filter key invalid. Please consult manual "
"and try again.\nOR\nAnalyte missing from filter "
"key dict."))
elif filt:
ind = self.make(analyte)
else:
ind = ~np.zeros(self.size, dtype=bool)
return ind
[docs] def get_components(self, key, analyte=None):
"""
Extract filter components for specific analyte(s).
Parameters
----------
key : str
string present in one or more filter names.
e.g. 'Al27' will return all filters with
'Al27' in their names.
analyte : str
name of analyte the filter is for
Returns
-------
boolean filter : array-like
"""
out = {}
for k, v in self.components.items():
if key in k:
if analyte is None:
out[k] = v
elif self.switches[analyte][k]:
out[k] = v
return out
[docs] def get_info(self):
"""
Get info for all filters.
"""
out = ''
for k in sorted(self.components.keys()):
out += '{:s}: {:s}'.format(k, self.info[k]) + '\n'
return(out)
## TODO: [Low Priority] Re-write filt object to use pandas?
# class filt(object):
# def __init__(self, size, analytes):
# self.size = size
# self.analytes = analytes
# self.filter_table = pd.DataFrame(index=pd.MultiIndex(levels=[[], []], labels=[[], []], names=['N', 'desc']),
# columns=self.analytes)
# self.filters = Bunch()
# self.param = Bunch()
# self.info = Bunch()
# self.N = 0
# def __repr__(self):
# pass
# def add(self, name, filt, info='', params=()):
# self.filters[self.N] = filt
# self.param[self.N] = params
# self.info[self.N] = info
# self.filter_table.loc[(self.N, name), :] = False
# self.N += 1
# def remove(self):
# pass
# def clear(self):
# self.__init__(self.size, self.analytes)
# def clean(self):
# pass
# def on(self):
# pass
# def off(self):
# pass
# def make(self):
# pass
# def fuzzmatch(self):
# pass
# def make_fromkey(self):
# pass
# def make_keydict(self):
# pass
# def grab_filt(self):
# pass
# def get_components(self):
# pass
# def get_info(self):
# pass