import json
import shutil
from os import path
from functools import reduce
from contextlib import contextmanager
import pygit2 as pg2
from .treewrapper import TreeWrapper
from .json_wrapper import JsonDictWrapper
from .search_functions import SearchFunction
__all__ = ['DEFAULT_TABLE', 'RESERVED_TABLE_NAMES', 'GitDB', 'Table']
DEFAULT_TABLE = '__defaulttable__'
RESERVED_TABLE_NAMES = {'__meta__', DEFAULT_TABLE}
[docs]class GitDB:
"""The raw database class.
This class constructs a database instance in the location described. This
is automatically created under the covers by :py:class:`ogitm.Model`, but
it can also be created and used outside the confines of Object-Model
mappings. Total freedom!
Any methods called on GitDB that can't be found will be passed to the
default :py:class:`~.gitdb.Table` instance, so this class could be used
as a simple one-table document store without worrying about tables at all.
This isn't recommended, however.
Parameters:
location (str): The path of the database
"""
def __init__(self, location):
self.location = location
self.meta_location = path.join(location, '__meta__')
self.meta_repo = pg2.init_repository(self.meta_location, bare=True)
self.meta_tree = JsonDictWrapper(TreeWrapper(self.meta_repo))
self.default_table = self.table(DEFAULT_TABLE)
[docs] def table(self, table_name):
"""Create a new table.
This creates a new table in the current database. You can also use
the form ``gitdb['table name']``, which delegates to this method. If
a table exists, this method will return a new instance of Table
pointing to the same table. (Note that two tables pointing to the same
location will always return equal.)
Parameters:
table_name (str): The name this table will take
Raises:
ValueError: if the name is a reserved table name
"""
if table_name in RESERVED_TABLE_NAMES:
if table_name != DEFAULT_TABLE:
raise ValueError("Table name " + table_name + " is reserved.")
tables = self.meta_tree.get('table_list', [])
if table_name not in tables:
tables.append(table_name)
self.meta_tree['table_list'] = tables
return Table(table_name, path.join(self.location, table_name))
def __getitem__(self, table_name):
return self.table(table_name)
[docs] def drop(self, table_name, force=False):
"""Completely and irevocably destroy a table.
Parameters:
table_name (str): The name of the table to destroy
force (bool): If true, no errors will be raised if the table does
not exist
Raises:
ValueError: if the table is reserved, or could not be deleted for
other reasons
"""
if table_name in RESERVED_TABLE_NAMES:
raise ValueError("Table name " + table_name + " is reserved.")
tables = self.meta_tree.get('table_list', [])
if table_name not in tables and not force:
raise ValueError("Table name " + table_name + " does not exist.")
elif force:
return
tables.remove(table_name)
try:
shutil.rmtree(path.join(self.location, table_name))
except OSError as oe: # pragma: no cover
msg = "Table name " + table_name + " could not be deleted"
raise ValueError(msg) from oe
def __getattr__(self, attr):
return getattr(self.default_table, attr)
[docs]class Table:
"""A class to represent an individual table in a database
This class should only really be created by a :py:class:`~.gitdb.GitDB`
instance, although instantiating it manually won't actually change the way
this class operates.
Parameters:
name (str): The name of the table
path (str): The path of the table (Note that this is the path to this
particular table's location, not the root path of the database.)
"""
def _get_next_id(self):
if 'meta-last_id' in self.meta_tree:
new_meta = int(self.meta_tree['meta-last_id']) + 1
else:
self.meta_tree['meta-last_id'] = '0'
new_meta = 0
self.meta_tree['meta-last_id'] = str(new_meta)
self.meta_tree.save()
return new_meta
def __init__(self, name, location):
self.name = name
self.location = location
self.dr_loc = path.join(location, 'data')
self.data_repo = pg2.init_repository(self.dr_loc, bare=True)
self.data_tree = JsonDictWrapper(TreeWrapper(self.data_repo))
self.mr_loc = path.join(location, 'meta')
self.meta_repo = pg2.init_repository(self.mr_loc, bare=True)
self.meta_tree = TreeWrapper(self.meta_repo)
self._transaction_open = False
self._context_managed = False
def __eq__(self, other):
return isinstance(other, Table) and other.location == self.location
@property
[docs] def transaction_open(self):
"""Returns whether there is currently a transaction open.
Read-only
"""
return self._transaction_open
[docs] def begin_transaction(self):
"""Opens a new transaction.
Raises:
ValueError: if a transaction is already open
See Also:
:py:meth:`~.Table.transaction`
a context manager that automatically handles most of the
details of a transaction
:py:meth:`~.Table.commit` and :py:meth:`~.Table.rollback`
methods for closing the transaction created here
"""
if self._context_managed:
m = "Cannot manually manage transaction inside context manager"
raise ValueError(m)
elif self._transaction_open:
m = "Cannot begin transaction when there is an open transaction"
raise ValueError(m)
self._transaction_open = True
[docs] def commit(self):
"""Commits all work performed during a transaction.
Raises:
ValueError: if this method is called inside the
:py:meth:`~.Table.transaction` context manager, or if there is
no open transaction when this method is called
See Also:
:py:meth:`~.Table.transaction`
a context manager that automatically handles most of the
details of a transaction
:py:meth:`~.Table.begin_transaction`
opens up a transaction
:py:meth:`~.Table.rollback`
rolls back instead of committing
"""
if self._context_managed:
m = "Cannot manually manage transaction inside context manager"
raise ValueError(m)
elif not self._transaction_open:
m = "Cannot commit when there is not open transaction"
raise ValueError(m)
self._transaction_open = False
self.save()
[docs] def rollback(self):
"""Rolls back all work performed during a transaction.
Raises:
ValueError: if this method is called inside the
:py:meth:`~.Table.transaction` context manager, or if there is
no open transaction when this method is called.
See Also:
:py:meth:`~.Table.transaction`
a context manager that automatically handles most of the
details of a transaction
:py:meth:`~.Table.begin_transaction`
opens up a transaction
:py:meth:`~.Table.commit`
commits instead of rolling back
"""
if self._context_managed:
m = "Cannot manually manage transaction inside context manager"
raise ValueError(m)
elif not self._transaction_open:
m = "Cannot rollback when there is not open transaction"
raise ValueError(m)
self._transaction_open = False
self.data_tree.rollback()
@contextmanager
[docs] def transaction(self):
"""A context manager for transactions.
Sometimes it's more convenient to use with-blocks for transactions.
This is a context manager to allow that. When entering the context,
it calls :py:meth:`~.Table.begin_transaction`. When leaving the
context due to normal execution, it will commit all changes. When
leaving the context due to an error or exception being raised, it will
revert all changes, and pass the error on up.
See Also:
:py:meth:`~.Table.begin_transaction`, \
:py:meth:`~.Table.commit`, \
:py:meth:`~.Table.rollback`
Methods for manually managing a transaction
"""
self.begin_transaction()
self._context_managed = True
try:
yield
except:
self._context_managed = False
self.rollback()
raise # re-raise exception, just checking if exception occurred
else:
self._context_managed = False
self.commit()
[docs] def revert_steps(self, steps, doc_id=None):
"""Reverts the whole database a number of steps.
Parameters:
steps (int): The number of steps to revert
doc_id (int): Not implemented yet
See Also:
:py:meth:`~.Table.revert_to_state`
Another way of reverting changes to the database
"""
if doc_id is None:
self.data_tree.revert_steps(steps)
else:
doc_name = 'doc-{id}'.format(id=doc_id)
self.data_tree.revert_steps(steps, doc=doc_name)
[docs] def revert_to_state(self, state, doc_id=None):
"""Reverts the whole database to a previously stored state.
Parameters:
state (oid): The state to return to
doc_id (int): Not implemented yet
See Also:
:py:meth:`~.Table.revert_steps`
Another way of reverting changes to the database
:py:meth:`~.Table.save_state`
A method that allows saving the state of the database
"""
self.data_tree.revert_to_state(state)
[docs] def save_state(self):
"""Returns a marker that can be used later to revert to the same state.
Because the database is built on top of git, all states are saved, and
can be checked out. This method returns a marker to the particular
commit that refers to the current database. Note that if the database
is reverted to a position before this marker, the database can still
be "for-verted" back to the marker position.
Returns:
A save state marker of arbitrary type
See Also:
:py:meth:`~.Table.revert_to_state`
Reverts to states saved by this method
"""
return self.data_tree.save_state()
[docs] def insert(self, document):
"""Inserts a document into this database.
Documents are key-value python dicts. Nested documents are not
currently tested, and will probably break everything. Documents also
can't be scalar objects, although again that is untested and behaviour
is therefore undefined in that area as well. Those should probably be
tested and defined more rigorously.
Oh, and also the only allowed keys and values are the standard
primitives (str, int, bool, float, etc), not other objects or
collections.
If a transaction is not open, this method will commit all changes into
the database.
Parameters:
document (dict): A key-val single-level dictionary
Returns:
int: Document ID
"""
d_id = self._get_next_id()
self.data_tree['doc-{id}'.format(id=d_id)] = document
# set up indexes
for key, val in document.items():
val = json.dumps(val)
index_name = 'index-{key}'.format(key=key)
index = self.data_tree.get(index_name, {})
index.setdefault(val, []).append(d_id)
self.data_tree[index_name] = index
if not self.transaction_open:
self.save('insert doc-{id}'.format(id=d_id))
return d_id
[docs] def update(self, d_id, document):
"""Updates the document at `d_id` with a new document
This method replaces the document at d_id with a new document,
completely deleting the old document to replace it with the new
version. This is not very efficient.
See the documentation for :py:meth:`~.Table.insert` for a discussion
on what actually counts as a document.
Parameters:
d_id (int): A previously-saved document id
document (dict): The document to replace with
Returns:
int: Document ID
Raises:
ValueError: if the document id does not exist
"""
doc_name = 'doc-{id}'.format(id=d_id)
if doc_name not in self.data_tree:
raise ValueError("Cannot update document that doesn't exist")
old_doc = self.data_tree[doc_name]
self.data_tree[doc_name] = document
# remove old indexes
for key, val in old_doc.items():
val = json.dumps(val)
index_name = 'index-{key}'.format(key=key)
index = self.data_tree.get(index_name, {})
index.setdefault(val, []).pop(d_id)
self.data_tree[index_name] = index
# insert new indexes
for key, val in document.items():
val = json.dumps(val)
index_name = 'index-{key}'.format(key=key)
index = self.data_tree.get(index_name, {})
index.setdefault(val, []).append(d_id)
self.data_tree[index_name] = index
if not self._transaction_open:
self.save('update ' + doc_name)
return d_id
[docs] def save(self, msg=''):
"""Commits all current unsaved changes
Normally, this will be automatically called by any methods that
make changes, or by the transaction methods. This shouldn't be called
otherwise, unless in exceptional circumstances (in which case, file an
issue because something's probably gone wrong.)
Parameters:
msg (str): This will become git's commit message
"""
return self.data_tree.save(msg)
[docs] def get(self, doc_id):
"""Gets a document given it's document id.
This is the simplest but least useful way of getting information out of
the database. It returns the document.
Parameters:
doc_id (int): The document ID to fetch
Returns:
dict: The document
"""
if not isinstance(doc_id, int):
raise TypeError("id must be an integer")
doc = self.data_tree.get('doc-{id}'.format(id=doc_id))
if doc is None:
err = "No such document under id {id}".format(id=doc_id)
raise ValueError(err)
return doc
[docs] def find_ids(self, where):
"""Find the ids that match a given query.
This method is the same as :py:meth:`~.Table.find`, but returns the
ids rather than (id, doc) pairs.
Parameters:
where (dict): Search definition (see :py:meth:`~.Table.find`)
Returns:
list[int]: A list of matching document ids
"""
return [i[0] for i in self.find(where)]
[docs] def find_items(self, where):
"""Find the documents that match a given query.
This method is the same as :py:meth:`~.Table.find`, but returns the
documents rather than (id, doc) pairs.
Parameters:
where (dict): Search definition (see :py:meth:`~.Table.find`)
Returns:
list[dict]: A list of matching documents
"""
return [i[1] for i in self.find(where)]
[docs] def find(self, where):
"""Finds the documents that match a given query.
For details on searching, see :doc:`/search_queries`. Searches in the
raw :py:class:`~.GitDB` should be documents, rather than keyword
arguments, but otherwise searches are the same.
This method returns (id, document) pairs. There are also the
convenience methods :py:meth:`~.Table.find_ids` and
:py:meth:`~.Table.find_items`, which just return the ids and documents
respectively.
Parameters:
where (dict): Search definition
Returns:
list[(int, dict)]: A list of matching documents
"""
all_ids = {int(i[4:]) for i in self.data_tree.items_list()
if i.startswith('doc-')}
id_sets = [all_ids]
for key, term in where.items():
index = self.data_tree.get('index-{key}'.format(key=key), {})
if isinstance(term, dict):
id_sets.append(self._find_complex(key, term, index, all_ids))
else: # simple term, i.e. name="bob"
id_sets.append(self._find_simple(key, term, index))
doc_ids = reduce(lambda x, y: x & y, id_sets)
return [(i, self.data_tree['doc-{id}'.format(id=i)]) for i in doc_ids]
[docs] def find_one(self, where):
"""Finds one document
This method functions the same as :py:meth:`~.Table.find`, but returns
just one element, or None if no element found.
Parameters:
where (dict): Search definition (see :py:meth:`~.Table.find`)
Returns:
*(int, document)* or *None*
"""
res = self.find(where)
if len(res) > 0:
return res[0]
else:
return None
def _find_simple(self, key, val, index):
vals = json.dumps(val)
return set(index.get(vals, []))
def _find_complex(self, key, query, index, al):
inc_sets = []
for operator, arg in query.items():
func = SearchFunction.get(operator)
inc_sets.append(func(key, operator, arg, index, query, al))
return reduce(lambda x, y: x & y, inc_sets)