diff --git a/README.md b/README.md index 0c2cf676..00fc157b 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ IPFS unixFS Engine [![](https://img.shields.io/badge/project-IPFS-blue.svg?style=flat-square)](http://ipfs.io/) [![](https://img.shields.io/badge/freenode-%23ipfs-blue.svg?style=flat-square)](http://webchat.freenode.net/?channels=%23ipfs) [![standard-readme compliant](https://img.shields.io/badge/standard--readme-OK-green.svg?style=flat-square)](https://github.com/RichardLitt/standard-readme) -[![Build Status](https://travis-ci.org/ipfs/js-ipfs-unixfs-engine.svg?style=flat-square)](https://travis-ci.org/ipfs/js-ipfs-unixfs-engine) +[![Build Status](https://travis-ci.org/ipfs/js-ipfs-unixfs-engine.svg?branch=master&style=flat-square)](https://travis-ci.org/ipfs/js-ipfs-unixfs-engine) [![Coverage Status](https://coveralls.io/repos/github/ipfs/js-ipfs-unixfs-engine/badge.svg?branch=master)](https://coveralls.io/github/ipfs/js-ipfs-unixfs-engine?branch=master) [![Dependency Status](https://david-dm.org/ipfs/js-ipfs-unixfs-engine.svg?style=flat-square)](https://david-dm.org/ipfs/js-ipfs-unixfs-engine) [![js-standard-style](https://img.shields.io/badge/code%20style-standard-brightgreen.svg?style=flat-square)](https://github.com/feross/standard) @@ -141,7 +141,7 @@ The input's file paths and directory structure will be preserved in the [`dag-pb - `trickle`: builds [a trickle tree](https://github.com/ipfs/specs/pull/57#issuecomment-265205384) - `maxChildrenPerNode` (positive integer, defaults to `174`): the maximum children per node for the `balanced` and `trickle` DAG builder strategies - `layerRepeat` (positive integer, defaults to 4): (only applicable to the `trickle` DAG builder strategy). The maximum repetition of parent nodes for each layer of the tree. -- `reduceSingleLeafToSelf` (boolean, defaults to `false`): optimization for, when reducing a set of nodes with one node, reduce it to that node. +- `reduceSingleLeafToSelf` (boolean, defaults to `true`): optimization for, when reducing a set of nodes with one node, reduce it to that node. - `dirBuilder` (object): the options for the directory builder - `hamt` (object): the options for the HAMT sharded directory builder - bits (positive integer, defaults to `8`): the number of bits at each bucket of the HAMT @@ -149,6 +149,7 @@ The input's file paths and directory structure will be preserved in the [`dag-pb - `onlyHash` (boolean, defaults to false): Only chunk and hash - do not write to disk - `hashAlg` (string): multihash hashing algorithm to use - `cidVersion` (integer, default 0): the CID version to use when storing the data (storage keys are based on the CID, _including_ it's version) +- `rawLeafNodes` (boolean, defaults to false): When a file would span multiple DAGNodes, if this is true the leaf nodes will be marked as `raw` `unixfs` nodes ### Exporter diff --git a/src/builder/builder.js b/src/builder/builder.js index 2f5e3188..c2e0bc09 100644 --- a/src/builder/builder.js +++ b/src/builder/builder.js @@ -16,7 +16,8 @@ const DAGNode = dagPB.DAGNode const defaultOptions = { chunkerOptions: { maxChunkSize: 262144 - } + }, + rawLeafNodes: false } module.exports = function (createChunker, ipld, createReducer, _options) { @@ -96,6 +97,7 @@ module.exports = function (createChunker, ipld, createReducer, _options) { let previous let count = 0 + const leafType = options.rawLeafNodes ? 'raw' : 'file' pull( file.content, @@ -106,7 +108,7 @@ module.exports = function (createChunker, ipld, createReducer, _options) { } return Buffer.from(chunk) }), - pull.map(buffer => new UnixFS('file', buffer)), + pull.map(buffer => new UnixFS(leafType, buffer)), pull.asyncMap((fileNode, callback) => { DAGNode.create(fileNode.marshal(), [], options.hashAlg, (err, node) => { callback(err, { DAGNode: node, fileNode: fileNode }) diff --git a/src/builder/index.js b/src/builder/index.js index 7f50391a..b4948405 100644 --- a/src/builder/index.js +++ b/src/builder/index.js @@ -13,7 +13,7 @@ const reducers = { const defaultOptions = { strategy: 'balanced', highWaterMark: 100, - reduceSingleLeafToSelf: false + reduceSingleLeafToSelf: true } module.exports = function (Chunker, ipld, _options) { diff --git a/src/builder/reduce.js b/src/builder/reduce.js index ed3092d8..de2584e6 100644 --- a/src/builder/reduce.js +++ b/src/builder/reduce.js @@ -10,19 +10,52 @@ const DAGNode = dagPB.DAGNode module.exports = function (file, ipld, options) { return function (leaves, callback) { - if (leaves.length === 1 && (leaves[0].single || options.reduceSingleLeafToSelf)) { - const leave = leaves[0] - callback(null, { - path: file.path, - multihash: leave.multihash, - size: leave.size, - leafSize: leave.leafSize, - name: leave.name - }) - return // early + if (leaves.length === 1 && leaves[0].single && options.reduceSingleLeafToSelf) { + const leaf = leaves[0] + + if (!options.rawLeafNodes) { + return callback(null, { + path: file.path, + multihash: leaf.multihash, + size: leaf.size, + leafSize: leaf.leafSize, + name: leaf.name + }) + } + + // we are using raw leaf nodes, this file only has one node but it'll be marked raw + // so convert it back to a file node + return waterfall([ + (cb) => ipld.get(new CID(leaf.multihash), cb), + (result, cb) => { + const meta = UnixFS.unmarshal(result.value.data) + const fileNode = new UnixFS('file', meta.data) + + DAGNode.create(fileNode.marshal(), [], options.hashAlg, (err, node) => { + cb(err, { DAGNode: node, fileNode: fileNode }) + }) + }, + (result, cb) => { + let cid = new CID(result.DAGNode.multihash) + + if (options.cidVersion === 1) { + cid = cid.toV1() + } + + ipld.put(result.DAGNode, { cid }, (err) => cb(err, result)) + }, + (result, cb) => { + cb(null, { + multihash: result.DAGNode.multihash, + size: result.DAGNode.size, + leafSize: result.fileNode.fileSize(), + name: '' + }) + } + ], callback) } - // create a parent node and add all the leafs + // create a parent node and add all the leaves const f = new UnixFS('file') const links = leaves.map((leaf) => { diff --git a/src/importer/index.js b/src/importer/index.js index bbe9cca3..d9433b2b 100644 --- a/src/importer/index.js +++ b/src/importer/index.js @@ -14,7 +14,8 @@ const chunkers = { } const defaultOptions = { - chunker: 'fixed' + chunker: 'fixed', + rawLeafNodes: false } module.exports = function (ipld, _options) { diff --git a/test/importer.js b/test/importer.js index f5b1edba..9ed78925 100644 --- a/test/importer.js +++ b/test/importer.js @@ -15,6 +15,9 @@ const CID = require('cids') const Ipld = require('ipld') const loadFixture = require('aegir/fixtures') const each = require('async/each') +const waterfall = require('async/waterfall') +const parallel = require('async/parallel') +const UnixFs = require('ipfs-unixfs') function stringifyMh (files) { return files.map((file) => { @@ -104,7 +107,67 @@ const strategyOverrides = { size: 2669627 } } +} + +const checkLeafNodeTypes = (ipld, options, expected, done) => { + waterfall([ + (cb) => pull( + pull.once({ + path: '/foo', + content: Buffer.alloc(262144 + 5).fill(1) + }), + importer(ipld, options), + pull.collect(cb) + ), + (files, cb) => ipld.get(new CID(files[0].multihash), cb), + (result, cb) => { + const node = result.value + const meta = UnixFs.unmarshal(node.data) + + expect(meta.type).to.equal('file') + expect(node.links.length).to.equal(2) + + parallel( + node.links.map(link => { + return (done) => { + waterfall([ + (next) => ipld.get(new CID(link.multihash), next), + (result, next) => { + const node = result.value + const meta = UnixFs.unmarshal(node.data) + + expect(meta.type).to.equal(expected) + + next() + } + ], done) + } + }), cb) + } + ], done) +} +const checkNodeLinks = (ipld, options, expected, done) => { + waterfall([ + (cb) => pull( + pull.once({ + path: '/foo', + content: Buffer.alloc(100).fill(1) + }), + importer(ipld, options), + pull.collect(cb) + ), + (files, cb) => ipld.get(new CID(files[0].multihash), cb), + (result, cb) => { + const node = result.value + const meta = UnixFs.unmarshal(node.data) + + expect(meta.type).to.equal('file') + expect(node.links.length).to.equal(expected) + + cb() + } + ], done) } module.exports = (repo) => { @@ -517,6 +580,30 @@ module.exports = (repo) => { pull.collect(onCollected) ) }) + + it('imports file with raw leaf nodes when specified', (done) => { + checkLeafNodeTypes(ipld, { + rawLeafNodes: true + }, 'raw', done) + }) + + it('imports file with file leaf nodes when specified', (done) => { + checkLeafNodeTypes(ipld, { + rawLeafNodes: false + }, 'file', done) + }) + + it('reduces file to single node when specified', (done) => { + checkNodeLinks(ipld, { + reduceSingleLeafToSelf: true + }, 0, done) + }) + + it('does not reduce file to single node when overidden by options', (done) => { + checkNodeLinks(ipld, { + reduceSingleLeafToSelf: false + }, 1, done) + }) }) }) }