Source code for scrapyd.interfaces

from zope.interface import Attribute, Interface


[docs] class IEggStorage(Interface): """ A component to store project eggs. """ def put(eggfile, project, version): """ Store the egg (a file object), which represents a ``version`` of the ``project``. """ def get(project, version=None): """ Return ``(version, file)`` for the egg matching the ``project`` and ``version``. If ``version`` is ``None``, the latest version and corresponding file are returned. If no egg is found, ``(None, None)`` is returned. .. tip:: Remember to close the ``file`` when done. """ def list(project): """ Return all versions of the ``project`` in order, with the latest version last. """ def list_projects(): """ Return all projects in storage. .. versionadded:: 1.3.0 Move this logic into the interface and its implementations, to allow customization. """ def delete(project, version=None): """ Delete the egg matching the ``project`` and ``version``. Delete the ``project``, if no versions remains. """
[docs] class IPoller(Interface): """ A component that tracks capacity for new jobs, and starts jobs when ready. """ queues = Attribute( """ An object (like a ``dict``) with a ``__getitem__`` method that accepts a project's name and returns its :py:interface:`spider queue<scrapyd.interfaces.ISpiderQueue>` of pending jobs. """ ) def poll(): """ Called periodically to start jobs if there's capacity. """ def next(): """ Return the next pending job. It should return a Deferred that will be fired when there's capacity, or already fired if there's capacity. The pending job is a ``dict`` containing at least the ``_project`` name, ``_spider`` name and ``_job`` ID. The job ID is unique, at least within the project. The pending job is later passed to :meth:`scrapyd.interfaces.IEnvironment.get_environment`. .. seealso:: :meth:`scrapyd.interfaces.ISpiderQueue.pop` """ def update_projects(): """ Called when projects may have changed, to refresh the available projects, including at initialization. """
[docs] class ISpiderQueue(Interface): """ A component to store pending jobs. The ``dict`` keys used by the chosen ``ISpiderQueue`` implementation must match the chosen: - :ref:`launcher` service (which calls :meth:`scrapyd.interfaces.IPoller.next`) - :py:interface:`~scrapyd.interfaces.IEnvironment` implementation (see :meth:`scrapyd.interfaces.IPoller.next`) - :ref:`webservices<config-services>` that schedule, cancel or list pending jobs """ def add(name, priority, **spider_args): """ Add a pending job, given the spider ``name``, crawl ``priority`` and keyword arguments, which might include the ``_job`` ID, egg ``_version`` and Scrapy ``settings`` depending on the implementation, with keyword arguments that are not recognized by the implementation being treated as spider arguments. .. versionchanged:: 1.3.0 Add the ``priority`` parameter. """ def pop(): """ Pop the next pending job. The pending job is a ``dict`` containing the spider ``name``. Depending on the implementation, other keys might include the ``_job`` ID, egg ``_version`` and Scrapy ``settings``, with keyword arguments that are not recognized by the receiver being treated as spider arguments. """ def list(): """ Return the pending jobs. .. seealso:: :meth:`scrapyd.interfaces.ISpiderQueue.pop` """ def count(): """ Return the number of pending jobs. """ def remove(func): """ Remove pending jobs for which ``func(job)`` is true, and return the number of removed pending jobss. """ def clear(): """ Remove all pending jobs. """
[docs] class ISpiderScheduler(Interface): """ A component to schedule jobs. """ def schedule(project, spider_name, priority, **spider_args): """ Schedule a crawl. .. versionchanged:: 1.3.0 Add the ``priority`` parameter. """ def list_projects(): """ Return all projects that can be scheduled. """ def update_projects(): """ Called when projects may have changed, to refresh the available projects, including at initialization. """
[docs] class IEnvironment(Interface): """ A component to generate the environment of jobs. The chosen ``IEnvironment`` implementation must match the chosen :ref:`launcher` service. """ def get_settings(message): """ Return the Scrapy settings to use for running the process. Depending on the chosen :ref:`launcher`, this would be one of more ``LOG_FILE`` or ``FEEDS``. .. versionadded:: 1.4.2 Support for overriding Scrapy settings via ``SCRAPY_`` environment variables was removed in Scrapy 2.8. :param message: the pending job received from the :meth:`scrapyd.interfaces.IPoller.next` method """ def get_environment(message, slot): """ Return the environment variables to use for running the process. Depending on the chosen :ref:`launcher`, this would be one of more of ``SCRAPY_PROJECT``, ``SCRAPYD_EGG_VERSION`` or ``SCRAPY_SETTINGS_MODULE``. :param message: the pending job received from the :meth:`scrapyd.interfaces.IPoller.next` method :param slot: the :ref:`launcher` slot for tracking the process """
[docs] class IJobStorage(Interface): """ A component to store finished jobs. .. versionadded:: 1.3.0 """ def add(job): """ Add a finished job in the storage. """ def list(): """ Return the finished jobs. .. seealso:: :meth:`scrapyd.interfaces.IJobStorage.__iter__` """ def __len__(): """ Return the number of finished jobs. """ def __iter__(): """ Iterate over the finished jobs in reverse order by ``end_time``. A job has the attributes ``project``, ``spider``, ``job``, ``start_time`` and ``end_time`` and may have the attributes ``args`` (``scrapy crawl`` CLI arguments) and ``env`` (environment variables). """