1/*
2 * Copyright (c) 2012-2013, 2015, 2017, 2019 ARM Limited
3 * All rights reserved.
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder.  You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2004-2005 The Regents of The University of Michigan
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 *
40 * Authors: Ali Saidi
41 *          Nathan Binkert
42 *          Andreas Sandberg
43 */
44
45#ifndef __DEV_DMA_DEVICE_HH__
46#define __DEV_DMA_DEVICE_HH__
47
48#include <deque>
49#include <memory>
50
51#include "base/circlebuf.hh"
52#include "dev/io_device.hh"
53#include "params/DmaDevice.hh"
54#include "sim/drain.hh"
55#include "sim/system.hh"
56
57class ClockedObject;
58
59class DmaPort : public MasterPort, public Drainable
60{
61  private:
62
63    /**
64     * Take the first packet of the transmit list and attempt to send
65     * it as a timing request. If it is successful, schedule the
66     * sending of the next packet, otherwise remember that we are
67     * waiting for a retry.
68     */
69    void trySendTimingReq();
70
71    /**
72     * For timing, attempt to send the first item on the transmit
73     * list, and if it is successful and there are more packets
74     * waiting, then schedule the sending of the next packet. For
75     * atomic, simply send and process everything on the transmit
76     * list.
77     */
78    void sendDma();
79
80    /**
81     * Handle a response packet by updating the corresponding DMA
82     * request state to reflect the bytes received, and also update
83     * the pending request counter. If the DMA request that this
84     * packet is part of is complete, then signal the completion event
85     * if present, potentially with a delay added to it.
86     *
87     * @param pkt Response packet to handler
88     * @param delay Additional delay for scheduling the completion event
89     */
90    void handleResp(PacketPtr pkt, Tick delay = 0);
91
92    struct DmaReqState : public Packet::SenderState
93    {
94        /** Event to call on the device when this transaction (all packets)
95         * complete. */
96        Event *completionEvent;
97
98        /** Total number of bytes that this transaction involves. */
99        const Addr totBytes;
100
101        /** Number of bytes that have been acked for this transaction. */
102        Addr numBytes;
103
104        /** Amount to delay completion of dma by */
105        const Tick delay;
106
107        DmaReqState(Event *ce, Addr tb, Tick _delay)
108            : completionEvent(ce), totBytes(tb), numBytes(0), delay(_delay)
109        {}
110    };
111
112  public:
113    /** The device that owns this port. */
114    ClockedObject *const device;
115
116    /** The system that device/port are in. This is used to select which mode
117     * we are currently operating in. */
118    System *const sys;
119
120    /** Id for all requests */
121    const MasterID masterId;
122
123  protected:
124    /** Use a deque as we never do any insertion or removal in the middle */
125    std::deque<PacketPtr> transmitList;
126
127    /** Event used to schedule a future sending from the transmit list. */
128    EventFunctionWrapper sendEvent;
129
130    /** Number of outstanding packets the dma port has. */
131    uint32_t pendingCount;
132
133    /** If the port is currently waiting for a retry before it can
134     * send whatever it is that it's sending. */
135    bool inRetry;
136
137    /** Default streamId */
138    const uint32_t defaultSid;
139
140    /** Default substreamId */
141    const uint32_t defaultSSid;
142
143  protected:
144
145    bool recvTimingResp(PacketPtr pkt) override;
146    void recvReqRetry() override;
147
148    void queueDma(PacketPtr pkt);
149
150  public:
151
152    DmaPort(ClockedObject *dev, System *s,
153            uint32_t sid = 0, uint32_t ssid = 0);
154
155    RequestPtr
156    dmaAction(Packet::Command cmd, Addr addr, int size, Event *event,
157              uint8_t *data, Tick delay, Request::Flags flag = 0);
158
159    RequestPtr
160    dmaAction(Packet::Command cmd, Addr addr, int size, Event *event,
161              uint8_t *data, uint32_t sid, uint32_t ssid, Tick delay,
162              Request::Flags flag = 0);
163
164    bool dmaPending() const { return pendingCount > 0; }
165
166    DrainState drain() override;
167};
168
169class DmaDevice : public PioDevice
170{
171   protected:
172    DmaPort dmaPort;
173
174  public:
175    typedef DmaDeviceParams Params;
176    DmaDevice(const Params *p);
177    virtual ~DmaDevice() { }
178
179    void dmaWrite(Addr addr, int size, Event *event, uint8_t *data,
180                  uint32_t sid, uint32_t ssid, Tick delay = 0)
181    {
182        dmaPort.dmaAction(MemCmd::WriteReq, addr, size, event, data,
183                          sid, ssid, delay);
184    }
185
186    void dmaWrite(Addr addr, int size, Event *event, uint8_t *data,
187                  Tick delay = 0)
188    {
189        dmaPort.dmaAction(MemCmd::WriteReq, addr, size, event, data, delay);
190    }
191
192    void dmaRead(Addr addr, int size, Event *event, uint8_t *data,
193                 uint32_t sid, uint32_t ssid, Tick delay = 0)
194    {
195        dmaPort.dmaAction(MemCmd::ReadReq, addr, size, event, data,
196                          sid, ssid, delay);
197    }
198
199    void dmaRead(Addr addr, int size, Event *event, uint8_t *data,
200                 Tick delay = 0)
201    {
202        dmaPort.dmaAction(MemCmd::ReadReq, addr, size, event, data, delay);
203    }
204
205    bool dmaPending() const { return dmaPort.dmaPending(); }
206
207    void init() override;
208
209    unsigned int cacheBlockSize() const { return sys->cacheLineSize(); }
210
211    Port &getPort(const std::string &if_name,
212                  PortID idx=InvalidPortID) override;
213
214};
215
216/**
217 * DMA callback class.
218 *
219 * Allows one to register for a callback event after a sequence of (potentially
220 * non-contiguous) DMA transfers on a DmaPort completes.  Derived classes must
221 * implement the process() method and use getChunkEvent() to allocate a
222 * callback event for each participating DMA.
223 */
224class DmaCallback : public Drainable
225{
226  public:
227    virtual const std::string name() const { return "DmaCallback"; }
228
229    /**
230     * DmaPort ensures that all oustanding DMA accesses have completed before
231     * it finishes draining.  However, DmaChunkEvents scheduled with a delay
232     * might still be sitting on the event queue.  Therefore, draining is not
233     * complete until count is 0, which ensures that all outstanding
234     * DmaChunkEvents associated with this DmaCallback have fired.
235     */
236    DrainState drain() override
237    {
238        return count ? DrainState::Draining : DrainState::Drained;
239    }
240
241  protected:
242    int count;
243
244    DmaCallback()
245        : count(0)
246    { }
247
248    virtual ~DmaCallback() { }
249
250    /**
251     * Callback function invoked on completion of all chunks.
252     */
253    virtual void process() = 0;
254
255  private:
256    /**
257     * Called by DMA engine completion event on each chunk completion.
258     * Since the object may delete itself here, callers should not use
259     * the object pointer after calling this function.
260     */
261    void chunkComplete()
262    {
263        if (--count == 0) {
264            process();
265            // Need to notify DrainManager that this object is finished
266            // draining, even though it is immediately deleted.
267            signalDrainDone();
268            delete this;
269        }
270    }
271
272  public:
273
274    /**
275     * Request a chunk event.  Chunks events should be provided to each DMA
276     * request that wishes to participate in this DmaCallback.
277     */
278    Event *getChunkEvent()
279    {
280        ++count;
281        return new EventFunctionWrapper([this]{ chunkComplete(); }, name(),
282                                        true);
283    }
284};
285
286/**
287 * Buffered DMA engine helper class
288 *
289 * This class implements a simple DMA engine that feeds a FIFO
290 * buffer. The size of the buffer, the maximum number of pending
291 * requests and the maximum request size are all set when the engine
292 * is instantiated.
293 *
294 * An <i>asynchronous</i> transfer of a <i>block</i> of data
295 * (designated by a start address and a size) is started by calling
296 * the startFill() method. The DMA engine will aggressively try to
297 * keep the internal FIFO full. As soon as there is room in the FIFO
298 * for more data <i>and</i> there are free request slots, a new fill
299 * will be started.
300 *
301 * Data in the FIFO can be read back using the get() and tryGet()
302 * methods. Both request a block of data from the FIFO. However, get()
303 * panics if the block cannot be satisfied, while tryGet() simply
304 * returns false. The latter call makes it possible to implement
305 * custom buffer underrun handling.
306 *
307 * A simple use case would be something like this:
308 * \code{.cpp}
309 *     // Create a DMA engine with a 1KiB buffer. Issue up to 8 concurrent
310 *     // uncacheable 64 byte (maximum) requests.
311 *     DmaReadFifo *dma = new DmaReadFifo(port, 1024, 64, 8,
312 *                                        Request::UNCACHEABLE);
313 *
314 *     // Start copying 4KiB data from 0xFF000000
315 *     dma->startFill(0xFF000000, 0x1000);
316 *
317 *     // Some time later when there is data in the FIFO.
318 *     uint8_t data[8];
319 *     dma->get(data, sizeof(data))
320 * \endcode
321 *
322 *
323 * The DMA engine allows new blocks to be requested as soon as the
324 * last request for a block has been sent (i.e., there is no need to
325 * wait for pending requests to complete). This can be queried with
326 * the atEndOfBlock() method and more advanced implementations may
327 * override the onEndOfBlock() callback.
328 */
329class DmaReadFifo : public Drainable, public Serializable
330{
331  public:
332    DmaReadFifo(DmaPort &port, size_t size,
333                unsigned max_req_size,
334                unsigned max_pending,
335                Request::Flags flags = 0);
336
337    ~DmaReadFifo();
338
339  public: // Serializable
340    void serialize(CheckpointOut &cp) const override;
341    void unserialize(CheckpointIn &cp) override;
342
343  public: // Drainable
344    DrainState drain() override;
345
346  public: // FIFO access
347    /**
348     * @{
349     * @name FIFO access
350     */
351    /**
352     * Try to read data from the FIFO.
353     *
354     * This method reads len bytes of data from the FIFO and stores
355     * them in the memory location pointed to by dst. The method
356     * fails, and no data is written to the buffer, if the FIFO
357     * doesn't contain enough data to satisfy the request.
358     *
359     * @param dst Pointer to a destination buffer
360     * @param len Amount of data to read.
361     * @return true on success, false otherwise.
362     */
363    bool tryGet(uint8_t *dst, size_t len);
364
365    template<typename T>
366    bool tryGet(T &value) {
367        return tryGet(static_cast<T *>(&value), sizeof(T));
368    };
369
370    /**
371     * Read data from the FIFO and panic on failure.
372     *
373     * @see tryGet()
374     *
375     * @param dst Pointer to a destination buffer
376     * @param len Amount of data to read.
377     */
378    void get(uint8_t *dst, size_t len);
379
380    template<typename T>
381    T get() {
382        T value;
383        get(static_cast<uint8_t *>(&value), sizeof(T));
384        return value;
385    };
386
387    /** Get the amount of data stored in the FIFO */
388    size_t size() const { return buffer.size(); }
389    /** Flush the FIFO */
390    void flush() { buffer.flush(); }
391
392    /** @} */
393  public: // FIFO fill control
394    /**
395     * @{
396     * @name FIFO fill control
397     */
398    /**
399     * Start filling the FIFO.
400     *
401     * @warn It's considered an error to call start on an active DMA
402     * engine unless the last request from the active block has been
403     * sent (i.e., atEndOfBlock() is true).
404     *
405     * @param start Physical address to copy from.
406     * @param size Size of the block to copy.
407     */
408    void startFill(Addr start, size_t size);
409
410    /**
411     * Stop the DMA engine.
412     *
413     * Stop filling the FIFO and ignore incoming responses for pending
414     * requests. The onEndOfBlock() callback will not be called after
415     * this method has been invoked. However, once the last response
416     * has been received, the onIdle() callback will still be called.
417     */
418    void stopFill();
419
420    /**
421     * Has the DMA engine sent out the last request for the active
422     * block?
423     */
424    bool atEndOfBlock() const {
425        return nextAddr == endAddr;
426    }
427
428    /**
429     * Is the DMA engine active (i.e., are there still in-flight
430     * accesses)?
431     */
432    bool isActive() const {
433        return !(pendingRequests.empty() && atEndOfBlock());
434    }
435
436    /** @} */
437  protected: // Callbacks
438    /**
439     * @{
440     * @name Callbacks
441     */
442    /**
443     * End of block callback
444     *
445     * This callback is called <i>once</i> after the last access in a
446     * block has been sent. It is legal for a derived class to call
447     * startFill() from this method to initiate a transfer.
448     */
449    virtual void onEndOfBlock() {};
450
451    /**
452     * Last response received callback
453     *
454     * This callback is called when the DMA engine becomes idle (i.e.,
455     * there are no pending requests).
456     *
457     * It is possible for a DMA engine to reach the end of block and
458     * become idle at the same tick. In such a case, the
459     * onEndOfBlock() callback will be called first. This callback
460     * will <i>NOT</i> be called if that callback initiates a new DMA transfer.
461     */
462    virtual void onIdle() {};
463
464    /** @} */
465  private: // Configuration
466    /** Maximum request size in bytes */
467    const Addr maxReqSize;
468    /** Maximum FIFO size in bytes */
469    const size_t fifoSize;
470    /** Request flags */
471    const Request::Flags reqFlags;
472
473    DmaPort &port;
474
475  private:
476    class DmaDoneEvent : public Event
477    {
478      public:
479        DmaDoneEvent(DmaReadFifo *_parent, size_t max_size);
480
481        void kill();
482        void cancel();
483        bool canceled() const { return _canceled; }
484        void reset(size_t size);
485        void process();
486
487        bool done() const { return _done; }
488        size_t requestSize() const { return _requestSize; }
489        const uint8_t *data() const { return _data.data(); }
490        uint8_t *data() { return _data.data(); }
491
492      private:
493        DmaReadFifo *parent;
494        bool _done;
495        bool _canceled;
496        size_t _requestSize;
497        std::vector<uint8_t> _data;
498    };
499
500    typedef std::unique_ptr<DmaDoneEvent> DmaDoneEventUPtr;
501
502    /**
503     * DMA request done, handle incoming data and issue new
504     * request.
505     */
506    void dmaDone();
507
508    /** Handle pending requests that have been flagged as done. */
509    void handlePending();
510
511    /** Try to issue new DMA requests or bypass DMA requests*/
512    void resumeFill();
513
514    /** Try to issue new DMA requests during normal execution*/
515    void resumeFillTiming();
516
517    /** Try to bypass DMA requests in KVM execution mode */
518    void resumeFillFunctional();
519
520  private: // Internal state
521    Fifo<uint8_t> buffer;
522
523    Addr nextAddr;
524    Addr endAddr;
525
526    std::deque<DmaDoneEventUPtr> pendingRequests;
527    std::deque<DmaDoneEventUPtr> freeRequests;
528};
529
530#endif // __DEV_DMA_DEVICE_HH__
531