root/hydranode/hncore/bt/files.h

Revision 2901, 13.5 kB (checked in by madcat, 3 years ago)

Should crash less now during torrent completition/canceling.

Line 
1 /*
2  *  Copyright (C) 2005-2006 Alo Sarv <madcat_@users.sourceforge.net>
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, write to the Free Software
16  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17  */
18
19 /**
20  * \file files.h      Interface for TorrentFile and PartialTorrent classes
21  */
22
23 #ifndef __BT_FILES_H__
24 #define __BT_FILES_H__
25
26 #include <hncore/sharedfile.h>
27 #include <hncore/partdata.h>
28 #include <hncore/hasher.h>
29 #include <hncore/bt/types.h>
30 #include <hncore/bt/torrentinfo.h>
31
32 namespace Bt {
33 class TorrentHasher;
34
35 /**
36  * TorrentFile is a slightly customized SharedFile type, which keeps an internal
37  * list of child objects. Since Torrents can often contain many files, but they
38  * must be operated as a single big file on protocol level, this class provides
39  * the required interface for reading data from the file. There is no
40  * corresponding physical file for this SharedFile, it's merely a virtual
41  * file.
42  */
43 class TorrentFile : public SharedFile {
44         friend class Torrent;
45 public:
46         /**
47          * Construct TorrentFile from a number of files (and optionally, also
48          * attach a PartialTorrent object, so getPartData() works properly).
49          *
50          * @param files         Files this torrent is composed of, keyed by
51          *                      begin offsets
52          * @param size          Total size of this torrent
53          * @param pt            PartialTorrent (optional)
54          */
55         TorrentFile(
56                 const std::map<uint64_t, SharedFile*> &files,
57                 TorrentInfo ti, PartialTorrent *pt = 0
58         );
59
60         /**
61          * Override base class read() method; forwards the call to the correct
62          * child object(s).
63          *
64          * @param begin     Begin offset to start reading (inclusive)
65          * @param end       End offset to end reading (inclusive)
66          * @return          The read data
67          *
68          * \throws std::exception if reading fails
69          */
70         virtual std::string read(uint64_t begin, uint64_t end);
71
72         /**
73          * Find out which sub-file contains the specified range. Note that it is
74          * not defined which file will be returned if the range crosses multiple
75          * files. It's not a big problem though, since the chunk sizes in BT are
76          * rather small, and this is called only by Client for most-recent chunk
77          * request.
78          *
79          * @param range       Range to search for
80          * @returns           Sub-file that contains the specified range
81          */
82         SharedFile* getContains(Range64 range) const;
83
84 protected:
85         /**
86          * Overrides base class method; does nothing, since we don't have
87          * anything to move when completing download.
88          */
89         virtual void finishDownload();
90
91 private:
92         /**
93          * Each sub-file in the torrent allocates a specific range in the
94          * torrent, and are thus kept in a RangeList. This customized Range
95          * object associates a range with a file.
96          */
97         struct InternalFile : public Range64 {
98                 InternalFile(const Range64 &r) : Range64(r), m_file() {}
99                 InternalFile(uint64_t begin, uint64_t size, SharedFile *f);
100                 InternalFile(uint64_t beg, uint64_t end)
101                 : Range64(beg, end), m_file() {}
102
103                 SharedFile *m_file;
104         };
105
106         //! Children
107         RangeList<InternalFile> m_children;
108
109         //! file -> offset reverse lookups
110         std::map<SharedFile*, uint64_t> m_childrenReverse;
111         typedef std::map<SharedFile*, uint64_t>::iterator RIter;
112
113         /**
114          * \name Event/signal handlers
115          */
116         //!@{
117         void onSharedFileEvent(SharedFile *file, int evt);
118         //!@}
119
120         //! Destructor is only allowed by Torrent class
121         ~TorrentFile();
122 };
123
124 /**
125  * PartialTorrent is a similar object as TorrentFile, with the difference of
126  * being a virtual wrapper around a number of PartData objects. It overrides
127  * a number of PartData virtual functions to forward the calls to coresponding
128  * sub-objects, based on offsets.
129  */
130 class PartialTorrent : public PartData {
131         friend class Torrent;
132 public:
133         /**
134          * Construct PartialTorrent from a list of PartData objects
135          *
136          * @param files       Objects to construct from, keyed by begin offsets
137          * @param loc         Location where to store cache files
138          * @param size        Total size of this torrent
139          */
140         PartialTorrent(
141                 const std::map<uint64_t, PartData*> &files,
142                 const boost::filesystem::path &loc, uint64_t size
143         );
144
145         /**
146          * Initiates Cache files and cached ranges listing.
147          *
148          * @param info        Torrent info used for reference data
149          */
150         void initCache(const TorrentInfo &info);
151
152         /**
153          * Overriding base class save() method, this omits writing the .dat file
154          * for the torrent (since it doesn't have one), and saves the cache
155          * files.
156          */
157         virtual void save();
158
159         /**
160          * \name Overide few more base class functions
161          */
162         //!@{
163         virtual void pause();
164         virtual void stop();
165         virtual void resume();
166         virtual void cancel();
167         virtual void allocDiskSpace();
168         virtual std::string getName() const { return m_name; }
169         //!@}
170
171         /**
172          * Find out which sub-file contains the specified range. Note that it is
173          * not defined which file will be returned if the range crosses multiple
174          * files. It's not a big problem though, since the chunk sizes in BT are
175          * rather small, and this is called only by Client for most-recent chunk
176          * request.
177          *
178          * @param range       Range to search for
179          * @returns           Sub-file that contains the specified range
180          */
181         PartData* getContains(Range64 range) const;
182
183         /**
184          * Sets the name of this file. Since the torrent consists of many files,
185          * this must be set externally by the creator of the download.
186          *
187          * \note initCache() function sets this as well (if not already set),
188          *       based on the passed TorrentInfo name.
189          *
190          * @param name      Name to be set.
191          */
192         void setName(const std::string &name) { m_name = name; }
193 protected:
194         /**
195          * Write data to the file
196          *
197          * @param begin         Begin offset to start writing at
198          * @param data          Data to be written
199          *
200          * \throws std::exception if something goes wrong
201          */
202         virtual void doWrite(uint64_t begin, const std::string &data);
203
204         /**
205          * Verify data at the specified range
206          *
207          * @param range        Range to be verified
208          * @param ref          Reference hash to check against
209          * @returns            The work object posted to workthread
210          */
211         virtual HashWorkPtr verifyRange(
212                 Range64 range, const HashBase *ref, bool doSave = true
213         );
214
215         /**
216          * Called by Detail::Chunk implementation class, indicates that the
217          * specified region of file didn't pass a hash check.
218          *
219          * @param r            Corrupt range
220          */
221         virtual void corruption(Range64 r);
222 private:
223         /**
224          * Each sub-file in the torrent allocates a specific range in the
225          * torrent, and are thus kept in a RangeList. This customized Range
226          * object associates a range with a file.
227          */
228         struct InternalFile : public Range64 {
229                 InternalFile(const Range64 &r) : Range64(r), m_file() {}
230                 InternalFile(uint64_t beg, uint64_t end)
231                 : Range64(beg, end), m_file() {}
232
233                 InternalFile(uint64_t begin, uint64_t size, PartData *f);
234
235                 PartData *m_file;
236         };
237
238         /**
239          * Implements CacheFile; refer to CacheFile class for documentation.
240          */
241         struct CacheImpl : public Range64 {
242                 CacheImpl(
243                         uint64_t begin, uint64_t end,
244                         const boost::filesystem::path &p
245                 );
246
247                 /**
248                  * Writes data to this cache file
249                  *
250                  * @param begin    Relative offset inside this file
251                  * @param data     Data to be written
252                  */
253                 void write(uint64_t begin, const std::string &data);
254
255                 /**
256                  * Flushes buffer to disk
257                  */
258                 void save();
259
260                 //! @returns the physical location for the cache data
261                 boost::filesystem::path getLocation() const { return m_loc; }
262         private:
263                 //! Physical location on disk
264                 boost::filesystem::path m_loc;
265                 //! Data buffer for non-flushed data
266                 std::map<uint64_t, std::string> m_buffer;
267                 //! Makes life simpler
268                 typedef std::map<uint64_t, std::string>::iterator BIter;
269         };
270
271         /**
272          * CacheFile represents a part of a chunk of a file that crosses file
273          * boundaries. Each file in the chunk has it's own CacheFile object.
274          * Each CacheFile is stored in disk as separate file, to allow
275          * TorrentHasher to work seamlessly across "real" and "cached" files.
276          *
277          * The purpose of this mechanism is to handle gaps in the torrent
278          * (files missing/canceled) cleanly - we always have the cache which
279          * we can read the missing data in order to perform cross-file hashes.
280          *
281          * The actual machinery is implemented in CacheImpl class, since this
282          * object is held on stack, and stored in m_cache RangeList.
283          */
284         struct CacheFile : public Range64 {
285                 CacheFile(uint64_t r) : Range64(r), m_impl() {}
286                 CacheFile(const Range64 &r) : Range64(r), m_impl() {}
287                 CacheFile(uint64_t beg, uint64_t end)
288                 : Range64(beg, end), m_impl() {}
289                 CacheFile(
290                         uint64_t begin, uint64_t end,
291                         const boost::filesystem::path &p
292                 ) : Range64(begin, end), m_impl(new CacheImpl(begin, end, p)) {}
293
294                 boost::filesystem::path getLocation() const {
295                         return m_impl->getLocation();
296                 }
297
298                 boost::shared_ptr<CacheImpl> m_impl;
299         };
300
301         //! Cache for all data chunks that cross file boundaries
302         RangeList<CacheFile> m_cache;
303
304         //! Children
305         RangeList<InternalFile> m_children;
306
307         //! Map of children, keyed by object, for reverse lookups
308         std::map<PartData*, uint64_t> m_childrenReverse;
309
310         //! Pending hash jobs, waiting for allocations to finish
311         std::list<boost::intrusive_ptr<TorrentHasher> > m_pendingChecks;
312
313         /**
314          * This is true when we'r in doWrite() call, false otherwise. The
315          * rationale is that in childDataAdded signal handler we call
316          * setComplete, but we want to call setComplete from doWrite() when
317          * writing from this file. The fundamental problem is actually the
318          * need to call setComplete from updateCache, which is needed since
319          * cache writes don't trigger the childDataAdded signal.
320          *
321          * Thus, the current behaviour is to set this variable to true while
322          * we'r in doWrite method, so childDataAdded signal handler won't call
323          * setComplete. When doWrite finishes, it calls setComplete itself.
324          */
325         bool m_writing;
326
327         //! Name of this download
328         std::string m_name;
329
330         /**
331          * @name Iterators for easier usage
332          */
333         //!@{
334         typedef std::map<PartData*, uint64_t>::iterator RIter;
335         //!@}
336
337         //! Destruction only allowed by Torrent class
338         ~PartialTorrent();
339
340         /**
341          * @name Signal handlers
342          */
343         //!@{
344         void childDataAdded(PartData *file, uint64_t offset, uint32_t amount);
345         void childCorruption(PartData *file, Range64 range);
346         bool childCanComplete(PartData *file);
347         void childChunkVerified(PartData *file, uint64_t csz, uint64_t chunk);
348         void childAllocDone(PartData *file);
349         void childPaused(PartData *file);
350         void childResumed(PartData *file);
351         void childDestroyed(PartData *file);
352         void parentChunkVerified(PartData *file, uint64_t csz, uint64_t chunk);
353         //!@}
354
355         /**
356          * Updates cache of cross-file chunks
357          *
358          * @param begin      Global begin offset of input data
359          * @param data       Data that was just written
360          */
361         void updateCache(uint64_t begin, const std::string &data);
362
363         /**
364          * Destroys all temporary 'cache' files.
365          */
366         void cleanCache(bool delTorrent);
367
368         /**
369          * Event handler for cache checksum jobs
370          */
371         void onCacheVerify(HashWorkPtr wrk, HashEvent evt);
372 };
373
374 /**
375  * Customized hasher for torrent files. Since chunks can cross file boundaries
376  * in torrents, we need to read data possibly from arbitary number of files,
377  * and concatenate the data together.
378  */
379 class TorrentHasher : public HashWork {
380 public:
381         /**
382          * Construct custom hasher
383          *
384          * @param globalOffsets     Global offsets (relative to torrent file)
385          * @param files             Files from which to read data
386          * @param relativeOffsets   Relative offsets to first/last file being
387          *                          hashed
388          * @param ref               Reference hash
389          */
390         TorrentHasher(
391                 Range64 globalOffsets,
392                 const std::vector<boost::filesystem::path> &files,
393                 std::pair<uint64_t, uint64_t> relativeOffsets,
394                 const HashBase *ref
395         );
396
397         //! Dummy destructor
398         ~TorrentHasher();
399
400         /**
401          * Set the list of files that we'r waiting for allocation to finish
402          * before running this job.
403          *
404          * @param wait     List of files to wait on
405          */
406         void waitAlloc(const std::set<PartData*> &wait) { m_waiting = wait; }
407
408         /**
409          * Indicate that an allocation job has finished
410          *
411          * @param f        The file that finished allocating
412          */
413         void allocDone(PartData *f) { m_waiting.erase(f); }
414
415         /**
416          * @returns true if this job can be run now (no allocations pending)
417          */
418         bool canRun() const { return !m_waiting.size(); }
419 protected:
420         /**
421          * Read next data from file
422          *
423          * @param pos      Current position in file
424          * @return         Number of bytes read
425          */
426         virtual uint64_t readNext(uint64_t pos);
427
428         /**
429          * Called when hash work has been finished. Adjusts begin() / end()
430          * variables to be global offsets (from m_globalOffsets member), since
431          * DURING hashing, we need relative offsets, but PartData needs global
432          * offsets in order to find the chunk that needed this hash job, thus
433          * the modification must be done prior to the events being posted.
434          */
435         virtual void finish();
436 private:
437         //! Files for this work
438         std::vector<boost::filesystem::path> m_files;
439
440         //! Iterator for the above vector
441         typedef std::vector<boost::filesystem::path>::iterator Iter;
442
443         //! Current file being hashed
444         Iter m_curFile;
445
446         //! Keeps the global offsets (relative to torrent start)
447         Range64 m_globalOffsets;
448
449         //! File allocations we'r waiting for before we can run this job
450         std::set<PartData*> m_waiting;
451 };
452
453 }
454
455 #endif
Note: See TracBrowser for help on using the browser.