GffAnnotationDb#

class GffAnnotationDb(*args, **kwargs: dict[str, Any])#

Support for annotations from gff files. Records that span multiple rows in the gff are merged into a single record.

Attributes:

db
describe: top level description of the annotation db
table_names

Methods

`add_feature`(*, seqid, biotype, name, spans)	adds a record to user table
`biotype_counts`()	return counts of biological types across all tables and seqids
`close`()	closes the db
`compatible`(other_db[, symmetric])	checks whether table_names are compatible
`count_distinct`(*[, seqid, biotype, name])	return table of counts of distinct values
`get_feature_children`(name[, biotype])	yields children of name
`get_feature_parent`(name, **kwargs)	yields parents of name
`get_records_matching`(*[, biotype, seqid, ...])	return all fields for matching records
`make_indexes`()	adds db indexes for core attributes
`num_matches`(*[, seqid, biotype, name, ...])	return the number of records matching condition
`subset`(*[, source, biotype, seqid, name, ...])	returns a new db instance with records matching the provided conditions
`to_rich_dict`()	returns a dict suitable for json serialisation
`union`(annot_db)	returns a new instance with merged records with other
`update`(annot_db[, seqids])	update records with those from an instance of the same type
`update_record_spans`(*, name, spans)	updates spans attribute of a gff table record if present
`write`(path)	writes db as bytes to path

add_records
from_dict
get_features_matching
to_json

StrOrBool = str | bool#

adds a record to user table

Parameters:

seqid: name of the sequence feature resides on
biotype: biological type of the record
name: the name of a record, an identifier
spans: this will be sorted
strand: either +, -. Defaults to ‘+’
attributes: additional attributes as a string
on_alignment: whether the annotation is an alignment annotation

add_records(reduced: dict, **kwargs: dict[str, Any]) → None#

biotype_counts() → dict#: return counts of biological types across all tables and seqids

close() → None#: closes the db

compatible(other_db: SupportsFeatures, symmetric=True) → bool#

checks whether table_names are compatible

Parameters:

other_db: the other annotation db instance
symmetric: checks only that tables of other_db equal, or are a subset, of mine

count_distinct(*, seqid: StrOrBool = False, biotype: StrOrBool = False, name: StrOrBool = False) → Table | None#

return table of counts of distinct values

Parameters:

seqid, biotype, name: if a string, selects the subset of rows matching the provided values and counts distinct values for the other fields whose value is True.

Returns:

Table with columns corresponding to argument whose value was True

Examples

To compute copy number by gene name within each genome

>>> counts_table = db.count_distinct(seqid=True, biotype="gene", name=True)

property db: Connection#

property describe: Table#: top level description of the annotation db

classmethod from_dict(data: dict) → AnnotationDbABC#

get_feature_children(name: str, biotype: str | None = None, **kwargs: dict[str, Any]) → Iterator[FeatureDataType]#: yields children of name

get_feature_parent(name: str, **kwargs: dict[str, Any]) → Iterator[FeatureDataType]#: yields parents of name

get_features_matching(*, biotype: str | None = None, seqid: str | None = None, name: str | None = None, start: int | None = None, stop: int | None = None, strand: str | None = None, attributes: str | None = None, on_alignment: bool | None = None, allow_partial: bool = False) → Iterator[FeatureDataType]#

get_records_matching(*, biotype: str | None = None, seqid: str | None = None, name: str | None = None, start: int | None = None, stop: int | None = None, strand: str | None = None, attributes: str | None = None, on_alignment: bool | None = None, allow_partial: bool = False) → Iterator[dict]#: return all fields for matching records

make_indexes() → None#: adds db indexes for core attributes

num_matches(*, seqid: str | None = None, biotype: str | None = None, name: str | None = None, strand: str | None = None, attributes: str | None = None, on_alignment: bool | None = None) → int#: return the number of records matching condition

subset(*, source: str | PathLike | PurePath | Path = ':memory:', biotype: str | None = None, seqid: str | None = None, name: str | None = None, start: int | None = None, stop: int | None = None, strand: str | None = None, attributes: str | None = None, allow_partial: bool = False) → Self#: returns a new db instance with records matching the provided conditions

property table_names: tuple[str]#

to_json() → str#

to_rich_dict() → dict#: returns a dict suitable for json serialisation

union(annot_db: SupportsFeatures) → SupportsFeatures#

returns a new instance with merged records with other

Parameters:

annot_db: an annotation db whose schema is either a subset, or superset of self

Returns:

The class whose schema contains the other

update(annot_db: SupportsFeatures, seqids: str | list[str] | None = None, **kwargs: dict[str, Any]) → None#: update records with those from an instance of the same type

update_record_spans(*, name: str, spans: list[list[int]]) → None#

updates spans attribute of a gff table record if present

Notes

Has no effect if name is not present.

write(path: str | PathLike | PurePath | Path) → None#: writes db as bytes to path