Source code for scrapyd.interfaces
from zope.interface import Attribute, Interface
[docs]
class IEggStorage(Interface):
"""
A component to store project eggs.
"""
def put(eggfile, project, version):
"""
Store the egg (a file object), which represents a ``version`` of the ``project``.
"""
def get(project, version=None):
"""
Return ``(version, file)`` for the egg matching the ``project`` and ``version``.
If ``version`` is ``None``, the latest version and corresponding file are returned.
If no egg is found, ``(None, None)`` is returned.
.. tip:: Remember to close the ``file`` when done.
"""
def list(project):
"""
Return all versions of the ``project`` in order, with the latest version last.
"""
def list_projects():
"""
Return all projects in storage.
.. versionadded:: 1.3.0
Move this logic into the interface and its implementations, to allow customization.
"""
def delete(project, version=None):
"""
Delete the egg matching the ``project`` and ``version``. Delete the ``project``, if no versions remains.
"""
[docs]
class IPoller(Interface):
"""
A component that tracks capacity for new jobs, and starts jobs when ready.
"""
queues = Attribute(
"""
An object (like a ``dict``) with a ``__getitem__`` method that accepts a project's name and returns its
:py:interface:`spider queue<scrapyd.interfaces.ISpiderQueue>` of pending jobs.
"""
)
def poll():
"""
Called periodically to start jobs if there's capacity.
"""
def next():
"""
Return the next pending job.
It should return a Deferred that will be fired when there's capacity, or already fired if there's capacity.
The pending job is a ``dict`` containing at least the ``_project`` name, ``_spider`` name and ``_job`` ID.
The job ID is unique, at least within the project.
The pending job is later passed to :meth:`scrapyd.interfaces.IEnvironment.get_environment`.
.. seealso:: :meth:`scrapyd.interfaces.ISpiderQueue.pop`
"""
def update_projects():
"""
Called when projects may have changed, to refresh the available projects, including at initialization.
"""
[docs]
class ISpiderQueue(Interface):
"""
A component to store pending jobs.
The ``dict`` keys used by the chosen ``ISpiderQueue`` implementation must match the chosen:
- :ref:`launcher` service (which calls :meth:`scrapyd.interfaces.IPoller.next`)
- :py:interface:`~scrapyd.interfaces.IEnvironment` implementation (see :meth:`scrapyd.interfaces.IPoller.next`)
- :ref:`webservices<config-services>` that schedule, cancel or list pending jobs
"""
def add(name, priority, **spider_args):
"""
Add a pending job, given the spider ``name``, crawl ``priority`` and keyword arguments, which might include the
``_job`` ID, egg ``_version`` and Scrapy ``settings`` depending on the implementation, with keyword arguments
that are not recognized by the implementation being treated as spider arguments.
.. versionchanged:: 1.3.0
Add the ``priority`` parameter.
"""
def pop():
"""
Pop the next pending job. The pending job is a ``dict`` containing the spider ``name``. Depending on the
implementation, other keys might include the ``_job`` ID, egg ``_version`` and Scrapy ``settings``, with
keyword arguments that are not recognized by the receiver being treated as spider arguments.
"""
def list():
"""
Return the pending jobs.
.. seealso:: :meth:`scrapyd.interfaces.ISpiderQueue.pop`
"""
def count():
"""
Return the number of pending jobs.
"""
def remove(func):
"""
Remove pending jobs for which ``func(job)`` is true, and return the number of removed pending jobss.
"""
def clear():
"""
Remove all pending jobs.
"""
[docs]
class ISpiderScheduler(Interface):
"""
A component to schedule jobs.
"""
def schedule(project, spider_name, priority, **spider_args):
"""
Schedule a crawl.
.. versionchanged:: 1.3.0
Add the ``priority`` parameter.
"""
def list_projects():
"""
Return all projects that can be scheduled.
"""
def update_projects():
"""
Called when projects may have changed, to refresh the available projects, including at initialization.
"""
[docs]
class IEnvironment(Interface):
"""
A component to generate the environment of jobs.
The chosen ``IEnvironment`` implementation must match the chosen :ref:`launcher` service.
"""
def get_settings(message):
"""
Return the Scrapy settings to use for running the process.
Depending on the chosen :ref:`launcher`, this would be one of more ``LOG_FILE`` or ``FEEDS``.
.. versionadded:: 1.4.2
Support for overriding Scrapy settings via ``SCRAPY_`` environment variables was removed in Scrapy 2.8.
:param message: the pending job received from the :meth:`scrapyd.interfaces.IPoller.next` method
"""
def get_environment(message, slot):
"""
Return the environment variables to use for running the process.
Depending on the chosen :ref:`launcher`, this would be one of more of ``SCRAPY_PROJECT``,
``SCRAPYD_EGG_VERSION`` or ``SCRAPY_SETTINGS_MODULE``.
:param message: the pending job received from the :meth:`scrapyd.interfaces.IPoller.next` method
:param slot: the :ref:`launcher` slot for tracking the process
"""
[docs]
class IJobStorage(Interface):
"""
A component to store finished jobs.
.. versionadded:: 1.3.0
"""
def add(job):
"""
Add a finished job in the storage.
"""
def list():
"""
Return the finished jobs.
.. seealso:: :meth:`scrapyd.interfaces.IJobStorage.__iter__`
"""
def __len__():
"""
Return the number of finished jobs.
"""
def __iter__():
"""
Iterate over the finished jobs in reverse order by ``end_time``.
A job has the attributes ``project``, ``spider``, ``job``, ``start_time`` and ``end_time`` and may have the
attributes ``args`` (``scrapy crawl`` CLI arguments) and ``env`` (environment variables).
"""