import abc
import logging
import circleparse
import numpy as np
from circleguard.enums import RatelimitWeight
from circleguard.mod import Mod
from circleguard.utils import TRACE
from circleguard.loader import Loader
from circleguard.span import Span
[docs]class Loadable(abc.ABC):
"""
Represents one or multiple replays, which have replay data to be loaded from
some additional source - the osu! api, local cache, or some other location.
Parameters
----------
cache: bool
Whether to cache the replay data once loaded.
"""
def __init__(self, cache):
self.loaded = False
self.cache = cache
[docs] @abc.abstractmethod
def load(self, loader, cache):
"""
Loads the information this loadable needs to become fully loaded.
Details left to the subclass implementation.
Parameters
----------
loader: :class:`~circleguard.loader.Loader`
The loader to load this loadable with. Although subclasses may not
end up using a :class:`~circleguard.loader.Loader` to load
themselves (if they don't load anything from the osu api, for
instance), a loader is still passed regardless.
cache: bool
Whether to cache the replay data once loaded. This argument
comes from a parent—either a :class:`~.LoadableContainer` or
:class:`~circleguard.circleguard.Circleguard` itself. Should the
loadable already have a set ``cache`` value, that should take
precedence over the option passed in this method, but if the
loadable has no preference then it should respect the value passed
here.
"""
pass
@abc.abstractmethod
def __eq__(self, loadable):
pass
[docs]class LoadableContainer(Loadable):
"""
A loadable which contains other loadables. This means that it has three
stages - unloaded, info loaded, and loaded.
When info loaded, the :class:`~LoadableContainer` has :class:`~Loadable`\s
but they are unloaded.
When loaded, the :class:`~LoadableContainer` has loaded :class:`Loadable`\s.
"""
def __init__(self, cache):
super().__init__(cache)
self.info_loaded = False
[docs] def load(self, loader, cache=None):
"""
Loads all :class:`~circleguard.loadable.Loadable`\s contained by this
loadable container.
Parameters
----------
loader: :class:`~circleguard.loader.Loader`
The loader to load the :class:`~circleguard.loadable.Loadable`\s
with.
"""
if self.loaded:
return
cascade_cache = cache if self.cache is None else self.cache
self.load_info(loader)
for loadable in self.all_loadables():
loadable.load(loader, cascade_cache)
self.loaded = True
# TODO in core 5.0.0: don't provide a default implementation of this method.
# we currently assume that users will only use LoadableContainer for two
# things:
# * needs to define ``Replay`` instances on load info and does not hold any
# ``LoadableContainer``s. In which case you should use ReplayContainer,
# which has ``load_info`` as abstract
# * does not need to define any ``Replay`` instances on load info, and holds
# ``LoadableContainer``s. In which case you should use ``Check`` or
# subclass LoadableContainer
# But there is a third option - needing to define ``Replay`` instances on
# load info, *and* holding ``LoadableContainer``s. In which case this
# default does not do what we want. It's not worth it to define this method
# here, so move this implementation to ``Check`` (or maybe even split into
# a further two subclasses, "does not create anything new on load info" which
# Check would inherit from, and "does create new Loadables on load info",
# which is the third case here).
def load_info(self, loader):
if self.info_loaded:
return
for loadable in self.all_loadables():
if isinstance(loadable, LoadableContainer):
loadable.load_info(loader)
self.info_loaded = True
@abc.abstractmethod
def all_loadables(self):
pass
[docs] @abc.abstractmethod
def all_replays(self):
"""
Returns all the :class:`~.Replay`\s in this loadable container.
Warnings
--------
If you want an accurate list of :class:`~.Replay`\s in this instance,
you must call :func:`~circleguard.circleguard.Circleguard.load` on this
instance before :func:`~Map.all_replays`. Otherwise, this
instance is not info loaded, and does not have a complete list of
replays it represents.
"""
pass
def __getitem__(self, key):
replays = self.all_replays()
if isinstance(key, slice):
return replays[key.start:key.stop:key.step]
else:
return replays[key]
def __iter__(self):
return iter(self.all_replays())
[docs]class ReplayContainer(LoadableContainer):
"""
A LoadableContainer that only holds Replays and subclasses thereof.
ReplayContainer's start unloaded and become info loaded when
:meth:`~LoadableContainer.load_info` is called. They become fully
loaded when :meth:`~Loadable.load`
is called (and if this is called when the ReplayContainer is in the
unloaded state, :meth:`~Loadable.load` will load info first,
then load the replays.)
In the unloaded state, the container has no actual Replay objects. It may
have limited knowledge about their number or type.
In the info loaded state, the container has references to Replay objects,
but those Replay objects are unloaded.
In the loaded state, the Replay objects are loaded.
"""
def __init__(self, cache):
super().__init__(cache)
# redefine as abstract. The LoadableContainer definition serves as a good
# default implementation for other user-defined loadable containers, but not
# for ReplayContainers and user-defined subclasses thereof.
@abc.abstractmethod
def load_info(self, loader):
pass
def all_loadables(self):
# ReplayContainers only contain replays, so these two functions
# are equivalent
return self.all_replays()
[docs]class Check(LoadableContainer):
"""
Organizes :class:`~.Loadable`\s and what to investigate them for.
Parameters
----------
loadables: list[:class:`~.Loadable`]
The loadables to hold for investigation.
cache: bool
Whether to cache the loadables once they are loaded. This will be
overriden by a ``cache`` option set by a :class:`~Loadable` in
``loadables``. It only affects children loadables when they do not have
a ``cache`` option set.
loadables2: list[:class:`~.Loadable`]
A second set of loadables to hold. Useful for partitioning loadables for
a replay stealing investigations.
"""
def __init__(self, loadables, cache, loadables2=None):
super().__init__(cache)
self.log = logging.getLogger(__name__ + ".Check")
self.loadables1 = [loadables] if isinstance(loadables, Loadable) else loadables
self.loadables2 = [loadables2] if isinstance(loadables2, Loadable) else [] if loadables2 is None else loadables2
[docs] def all_loadables(self):
"""
Returns all the :class:`~circleguard.loadable.Loadable`\s contained by
this check.
Returns
-------
list[:class:`~Loadable`]
All the loadables in this check.
See Also
--------
:func:`~Check.all_replays`.
Notes
-----
:class:`~circleguard.loadable.Loadable`\s are very different from
:class:`~circleguard.loadable.Replay`\s -
``len(check.all_loadables())`` will *not* return the number of
replays in the check, for instance.
"""
return self.loadables1 + self.loadables2
[docs] def all_replays(self):
"""
Returns all the :class:`~.Replay`\s in this check. Contrast with
:func:`~Check.all_loadables`, which returns all the
:class:`~.Loadable`\s in this check.
Returns
-------
list[:class:`~Replay`]
All the replays in this check.
"""
return self.all_replays1() + self.all_replays2()
[docs] def all_replays1(self):
"""
Returns all the :class:`~.Replay`\s contained by ``loadables1`` of this
check.
Returns
-------
list[:class:`~Replay`]
All the replays contained by ``loadables1`` of this check.
"""
replays = []
for loadable in self.loadables1:
if isinstance(loadable, LoadableContainer):
replays += loadable.all_replays()
else:
replays.append(loadable) # loadable is a Replay
return replays
[docs] def all_replays2(self):
"""
Returns all the :class:`~.Replay`\s contained by ``loadables2`` of this
check.
Returns
-------
list[:class:`~Replay`]
All the replays contained by ``loadables2`` of this check.
"""
replays2 = []
for loadable in self.loadables2:
if isinstance(loadable, LoadableContainer):
replays2 += loadable.all_replays()
else:
replays2.append(loadable) # loadable is a Replay
return replays2
def __eq__(self, loadable):
if not isinstance(loadable, Check):
return False
return self.all_replays() == loadable.all_replays()
def __repr__(self):
return (f"Check(loadables={self.loadables1},loadables2={self.loadables2},"
f"loaded={self.loaded})")
[docs]class Map(ReplayContainer):
"""
A map's top plays (leaderboard), as seen on the website.
Parameters
----------
map_id: int
The map to represent the top plays for.
span: str or Span
A comma separated list of ranges of top plays to retrieve.
``span="1-3,6,2-4"`` -> replays in the range ``[1,2,3,4,6]``.
mods: :class:`~.enums.ModCombination`
If passed, only represent replays played with this exact mod
combination. Due to limitations with the api, fuzzy matching is not
implemented. <br>
This is applied before span``. That is, if ``span="1-2"``
and ``mods=Mod.HD``, the top two ``HD`` plays on the map are
represented.
cache: bool
Whether to cache the replays once they are loaded.
"""
def __init__(self, map_id, span, mods=None, cache=None):
super().__init__(cache)
self.replays = []
self.map_id = map_id
self.mods = mods
self.span = Span(span)
def load_info(self, loader):
if self.info_loaded:
return
for info in loader.replay_info(self.map_id, self.span, mods=self.mods):
self.replays.append(ReplayMap(info.map_id, info.user_id, info.mods, cache=self.cache, info=info))
self.info_loaded = True
[docs] def all_replays(self):
return self.replays
def __eq__(self, loadable):
if not isinstance(loadable, Map):
return False
return (self.map_id == loadable.map_id and self.mods == loadable.mods
and self.span == loadable.span)
def __repr__(self):
return (f"Map(map_id={self.map_id},cache={self.cache},mods={self.mods},"
f"span={self.span},replays={self.replays},loaded={self.loaded})")
def __str__(self):
return f"Map {self.map_id}"
[docs]class User(ReplayContainer):
"""
A user's top plays (pp-wise, as seen on the website).
Parameters
----------
user_id: int
The user to represent the top plays for.
span: str or Span
A comma separated list of ranges of top plays to retrieve.
``span="1-3,6,2-4"`` -> replays in the range ``[1,2,3,4,6]``.
mods: :class:`~.enums.ModCombination`
If passed, only represent replays played with this exact mod
combination. Due to limitations with the api, fuzzy matching is not
implemented. <br>
This is applied before ``span``. That is, if ``span="1-2"``
and ``mods=Mod.HD``, the user's top two ``HD`` plays are represented.
cache: bool
Whether to cache the replays once they are loaded.
available_only: bool
Whether to represent only replays that have replay data available.
Replays are filtered on this basis after ``mods`` and ``span``
are applied. True by default.
"""
def __init__(self, user_id, span, mods=None, cache=None, available_only=True):
super().__init__(cache)
self.replays = []
self.user_id = user_id
self.span = Span(span)
self.mods = mods
self.available_only = available_only
def load_info(self, loader):
if self.info_loaded:
return
for info in loader.get_user_best(self.user_id, span=self.span, mods=self.mods):
if self.available_only and not info.replay_available:
continue
self.replays.append(ReplayMap(info.map_id, info.user_id, info.mods, cache=self.cache, info=info))
self.info_loaded = True
[docs] def all_replays(self):
return self.replays
def __eq__(self, loadable):
if not isinstance(loadable, User):
return False
return (self.user_id == loadable.user_id and self.mods == loadable.mods
and self.span == loadable.span)
[docs]class MapUser(ReplayContainer):
"""
All replays on a map by a user, not just the top replay.
Parameters
----------
map_id: int
The map to represent scores by `user_id` on.
user_id: int
The user to represent scores on `map_id` for.
span: str or Span
A comma separated list of ranges of plays to retrieve.
``span="1-3,6,2-4"`` -> replays in the range ``[1,2,3,4,6]``.
cache: bool
Whether to cache the replays once they are loaded.
available_only: bool
Whether to represent only replays that have replay data available.
Replays are filtered on this basis after ``span`` is applied.
True by default.
"""
def __init__(self, map_id, user_id, span=Loader.MAX_MAP_SPAN, cache=None, available_only=True):
super().__init__(cache)
self.replays = []
self.map_id = map_id
self.user_id = user_id
self.span = Span(span)
self.available_only = available_only
def load_info(self, loader):
if self.info_loaded:
return
for info in loader.replay_info(self.map_id, span=self.span, user_id=self.user_id, limit=False):
if self.available_only and not info.replay_available:
continue
self.replays.append(ReplayMap(info.map_id, info.user_id, info.mods, cache=self.cache, info=info))
self.info_loaded = True
[docs] def all_replays(self):
return self.replays
def __eq__(self, loadable):
if not isinstance(loadable, MapUser):
return False
return (self.map_id == loadable.map_id and self.user_id == loadable.user_id
and self.span == loadable.span)
[docs]class Replay(Loadable):
"""
A replay played by a player.
Parameters
----------
weight: :class:`~.enums.RatelimitWeight`
How much it 'costs' to load this replay from the api.
cache: bool
Whether to cache this replay once it is loaded.
Attributes
----------
timestamp: :class:`datetime.datetime`
When this replay was played.
map_id: int
The id of the map the replay was played on, or 0 if
unknown or on an unsubmitted map.
user_id: int
The id of the player who played the replay, or 0 if unknown
(if the player is restricted, for instance). Note that if the
user id is known, even if the user is restricted, it should still be
given instead of 0.
username: str
The username of the player who played the replay.
mods: :class:`~.enums.ModCombination`
The mods the replay was played with.
replay_id: int
The id of the replay, or 0 if the replay is unsubmitted.
t: ndarray[int]
A 1d array containing the timestamp for each frame. <br>
This is only nonnull after the replay has been loaded.
xy: ndarray[float]
A 2d, two column array, containing the ``x`` and ``y`` coordinates of
each frame in the first and second column respectively. <br>
This is only nonnull after the replay has been loaded.
k: ndarray[int]
A 1d array containing the keys pressed for each frame. <br>
This is only nonnull after the replay has been loaded.
"""
def __init__(self, weight, cache):
super().__init__(cache)
self.weight = weight
# remains ``None`` until replay is loaded
self.timestamp = None
self.map_id = None
self.username = None
self.user_id = None
self.mods = None
self.replay_id = None
self.replay_data = None
# remains ``None``` when replay is unloaded or loaded but with no data
self.t = None
self.xy = None
self.k = None
def _process_replay_data(self, replay_data):
"""
Preprocesses the replay data (turns it into numpy arrays) for fast
manipulation when investigating.
Paramters
---------
replay_data: list[:class:`~circleparse.Replay.ReplayEvent`]
A list of :class:`~circleparse.Replay.ReplayEvent` objects,
representing the actual data of the replay. If the replay could not
be loaded, this should be ``None``.
Notes
-----
This method must be called before a replay can be considered loaded
(ie before you set ``loaded`` to ``True``).
"""
self.replay_data = replay_data
# replay wasn't available, can't preprocess the data
if replay_data is None:
return
# remove invalid zero time frame at beginning of replay
# https://github.com/ppy/osu/blob/1587d4b26fbad691242544a62dbf017a78705ae3/osu.Game/Scoring/Legacy/LegacyScoreDecoder.cs#L242-L245
if replay_data[0].time_since_previous_action == 0:
replay_data = replay_data[1:]
# t, x, y, k
data = [[], [], [], []]
running_t = 0
# negative frame times are valid when they're at the beginning of a
# replay (they're frames from before the first hitobject at t=0).
# Count all negative frames until we hit our first positive one, then
# don't count negative frames after (eg in the middle of the replay).
positive_seen = False
for e in replay_data:
e_t = e.time_since_previous_action
# lazer ignores frames with negative time, but still adds it to the running time
# https://github.com/ppy/osu/blob/1587d4b26fbad691242544a62dbf017a78705ae3/osu.Game/Scoring/Legacy/LegacyScoreDecoder.cs#L247-L250
if e_t < 0:
if not positive_seen:
running_t += e_t
continue
else:
positive_seen = True
running_t += e_t
data[0].append(running_t)
data[1].append(e.x)
data[2].append(e.y)
data[3].append(e.keys_pressed)
block = np.array(data)
t = np.array(block[0], dtype=int)
xy = np.array([block[1], block[2]], dtype=float).T
k = np.array(block[3], dtype=int)
# sort our data by t
t_sort = np.argsort(t)
t = t[t_sort]
xy = xy[t_sort]
k = k[t_sort]
self.t = t
self.xy = xy
self.k = k
def __repr__(self):
return (f"Replay(timestamp={self.timestamp},map_id={self.map_id},user_id={self.user_id},mods={self.mods},"
f"replay_id={self.replay_id},weight={self.weight},loaded={self.loaded},username={self.username})")
def __str__(self):
return f"Replay by {self.username} on {self.map_id}"
[docs]class ReplayMap(Replay):
"""
A :class:`~.Replay` that was submitted to online servers.
Parameters
----------
map_id: int
The id of the map the replay was played on.
user_id: int
The id of the player who played the replay.
mods: ModCombination
The mods the replay was played with. If ``None``, the
highest scoring replay of ``user_id`` on ``map_id`` will be loaded,
regardless of mod combination. Otherwise, the replay with ``mods``
will be loaded.
cache: bool
Whether to cache this replay once it is loaded.
"""
def __init__(self, map_id, user_id, mods=None, cache=None, info=None):
super().__init__(RatelimitWeight.HEAVY, cache)
self.log = logging.getLogger(__name__ + ".ReplayMap")
self.map_id = map_id
self.user_id = user_id
self.mods = mods
self.info = info
if info:
self.timestamp = info.timestamp
self.map_id = info.map_id
self.user_id = info.user_id
self.username = info.username
self.replay_id = info.replay_id
self.mods = info.mods
[docs] def load(self, loader, cache):
"""
Loads the data for this replay from the api.
Parameters
----------
loader: :class:`~.loader.Loader`
The :class:`~.loader.Loader` to load this replay with.
cache: bool
Whether to cache this replay after loading it. This only has an
effect if ``self.cache`` is unset (``None``).
Notes
-----
If ``replay.loaded`` is ``True``, this method has no effect.
``replay.loaded`` is set to ``True`` after this method is finished.
"""
# only listen to the parent's cache if ours is not set. Lower takes precedence
cache = cache if self.cache is None else self.cache
self.log.debug("Loading %r", self)
if self.loaded:
self.log.debug("%s already loaded, not loading", self)
return
if self.info:
info = self.info
else:
info = loader.replay_info(self.map_id, user_id=self.user_id, mods=self.mods)
self.timestamp = info.timestamp
self.username = info.username
self.mods = info.mods
self.replay_id = info.replay_id
replay_data = loader.replay_data(info, cache=cache)
self._process_replay_data(replay_data)
self.loaded = True
self.log.log(TRACE, "Finished loading %s", self)
def __eq__(self, loadable):
"""
Warning
-------
This equality check does not take into account attributes such as
``cache``. This is intentional - equality here means "do they represent
the same replay".
TODO possible false positive if a user overwrites their score inbetween
loading two otherwise identical replay maps. Similar situation to
ReplayPath equality. Could equality check replay data instead if both
are loaded.
"""
if not isinstance(loadable, ReplayMap):
return False
return self.map_id == loadable.map_id and self.user_id == loadable.user_id and self.mods == loadable.mods
def __repr__(self):
if self.loaded:
return (f"ReplayMap(timestamp={self.timestamp},map_id={self.map_id},user_id={self.user_id},mods={self.mods},"
f"cache={self.cache},replay_id={self.replay_id},loaded={self.loaded},username={self.username})")
else:
return (f"ReplayMap(map_id={self.map_id},user_id={self.user_id},mods={self.mods},cache={self.cache},"
f"loaded={self.loaded})")
def __str__(self):
return f"{'Loaded' if self.loaded else 'Unloaded'} ReplayMap by {self.user_id} on {self.map_id}"
[docs]class ReplayPath(Replay):
"""
A :class:`~.Replay` saved locally in a ``.osr`` file.
Parameters
----------
path: str or :class:`os.PathLike`
The path to the replay file.
cache: bool
Whether to cache this replay once it is loaded. Note that currently
we do not cache :class:`~.ReplayPath` regardless of this parameter.
"""
def __init__(self, path, cache=None):
super().__init__(RatelimitWeight.LIGHT, cache)
self.log = logging.getLogger(__name__ + ".ReplayPath")
self.path = path
self.hash = None
[docs] def load(self, loader, cache):
"""
Loads the data for this replay from the osr file.
Parameters
----------
loader: :class:`~.loader.Loader`
The :class:`~.loader.Loader` to load this replay with.
cache: bool
Whether to cache this replay after loading it. This only has an
effect if ``self.cache`` is unset (``None``). Note that currently
we do not cache :class:`~.ReplayPath` regardless of this parameter.
Notes
-----
If ``replay.loaded`` is ``True``, this method has no effect.
``replay.loaded`` is set to ``True`` after this method is finished.
"""
# we don't cache local replays currently. Ignore cache option for if/when we need it
self.log.debug("Loading ReplayPath %r", self)
if self.loaded:
self.log.debug("%s already loaded, not loading", self)
return
loaded = circleparse.parse_replay_file(self.path)
self.timestamp = loaded.timestamp
self.map_id = loader.map_id(loaded.beatmap_hash)
self.username = loaded.player_name
# TODO make this lazy loaded so we don't waste an api call
self.user_id = loader.user_id(loaded.player_name)
self.mods = Mod(loaded.mod_combination)
self.replay_id = loaded.replay_id
self.hash = loaded.beatmap_hash
self._process_replay_data(loaded.play_data)
self.loaded = True
self.log.log(TRACE, "Finished loading %s", self)
def __eq__(self, loadable):
"""
Warnings
--------
XXX replays with the same path but different replay data (because the
file at the path got changed for one but not the other) will return
True in an equality check when they are not necessarily representing
the same replay.
TODO possible solution - check replay_data equality if both are loaded?
might be unexpected behavior to some
```
r1 = ReplayPath("./1.osr")
cg.load(r1)
# change the file located at ./1.osr to another osr file
r2 = ReplayPath("./1.osr")
cg.load(r2)
r1 == r2 # True, but they contain different replay_data
```
"""
if not isinstance(loadable, ReplayPath):
return False
return self.path == loadable.path
def __repr__(self):
if self.loaded:
return (f"ReplayPath(path={self.path},map_id={self.map_id},user_id={self.user_id},mods={self.mods},"
f"replay_id={self.replay_id},weight={self.weight},loaded={self.loaded},username={self.username})")
else:
return f"ReplayPath(path={self.path},weight={self.weight},loaded={self.loaded})"
def __str__(self):
if self.loaded:
return f"Loaded ReplayPath by {self.username} on {self.map_id} at {self.path}"
else:
return f"Unloaded ReplayPath at {self.path}"