Skip to content
This repository has been archived by the owner on Aug 12, 2020. It is now read-only.

Commit

Permalink
chore: use raw nodes for leaf data
Browse files Browse the repository at this point in the history
go uses `raw` unixfs nodes for leaf data whereas this module uses `file` nodes, that causes CIDs to differ between the two implementations
  • Loading branch information
achingbrain committed Jun 12, 2018
1 parent 0af9bf4 commit 9d44a75
Show file tree
Hide file tree
Showing 6 changed files with 141 additions and 17 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ IPFS unixFS Engine
[![](https://img.shields.io/badge/project-IPFS-blue.svg?style=flat-square)](http://ipfs.io/)
[![](https://img.shields.io/badge/freenode-%23ipfs-blue.svg?style=flat-square)](http://webchat.freenode.net/?channels=%23ipfs)
[![standard-readme compliant](https://img.shields.io/badge/standard--readme-OK-green.svg?style=flat-square)](https://github.com/RichardLitt/standard-readme)
[![Build Status](https://travis-ci.org/ipfs/js-ipfs-unixfs-engine.svg?style=flat-square)](https://travis-ci.org/ipfs/js-ipfs-unixfs-engine)
[![Build Status](https://travis-ci.org/ipfs/js-ipfs-unixfs-engine.svg?branch=master&style=flat-square)](https://travis-ci.org/ipfs/js-ipfs-unixfs-engine)
[![Coverage Status](https://coveralls.io/repos/github/ipfs/js-ipfs-unixfs-engine/badge.svg?branch=master)](https://coveralls.io/github/ipfs/js-ipfs-unixfs-engine?branch=master)
[![Dependency Status](https://david-dm.org/ipfs/js-ipfs-unixfs-engine.svg?style=flat-square)](https://david-dm.org/ipfs/js-ipfs-unixfs-engine)
[![js-standard-style](https://img.shields.io/badge/code%20style-standard-brightgreen.svg?style=flat-square)](https://github.com/feross/standard)
Expand Down Expand Up @@ -141,14 +141,15 @@ The input's file paths and directory structure will be preserved in the [`dag-pb
- `trickle`: builds [a trickle tree](https://github.com/ipfs/specs/pull/57#issuecomment-265205384)
- `maxChildrenPerNode` (positive integer, defaults to `174`): the maximum children per node for the `balanced` and `trickle` DAG builder strategies
- `layerRepeat` (positive integer, defaults to 4): (only applicable to the `trickle` DAG builder strategy). The maximum repetition of parent nodes for each layer of the tree.
- `reduceSingleLeafToSelf` (boolean, defaults to `false`): optimization for, when reducing a set of nodes with one node, reduce it to that node.
- `reduceSingleLeafToSelf` (boolean, defaults to `true`): optimization for, when reducing a set of nodes with one node, reduce it to that node.
- `dirBuilder` (object): the options for the directory builder
- `hamt` (object): the options for the HAMT sharded directory builder
- bits (positive integer, defaults to `8`): the number of bits at each bucket of the HAMT
- `progress` (function): a function that will be called with the byte length of chunks as a file is added to ipfs.
- `onlyHash` (boolean, defaults to false): Only chunk and hash - do not write to disk
- `hashAlg` (string): multihash hashing algorithm to use
- `cidVersion` (integer, default 0): the CID version to use when storing the data (storage keys are based on the CID, _including_ it's version)
- `rawLeafNodes` (boolean, defaults to false): When a file would span multiple DAGNodes, if this is true the leaf nodes will be marked as `raw` `unixfs` nodes

### Exporter

Expand Down
6 changes: 4 additions & 2 deletions src/builder/builder.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ const DAGNode = dagPB.DAGNode
const defaultOptions = {
chunkerOptions: {
maxChunkSize: 262144
}
},
rawLeafNodes: false
}

module.exports = function (createChunker, ipld, createReducer, _options) {
Expand Down Expand Up @@ -96,6 +97,7 @@ module.exports = function (createChunker, ipld, createReducer, _options) {

let previous
let count = 0
const leafType = options.rawLeafNodes ? 'raw' : 'file'

pull(
file.content,
Expand All @@ -106,7 +108,7 @@ module.exports = function (createChunker, ipld, createReducer, _options) {
}
return Buffer.from(chunk)
}),
pull.map(buffer => new UnixFS('file', buffer)),
pull.map(buffer => new UnixFS(leafType, buffer)),
pull.asyncMap((fileNode, callback) => {
DAGNode.create(fileNode.marshal(), [], options.hashAlg, (err, node) => {
callback(err, { DAGNode: node, fileNode: fileNode })
Expand Down
2 changes: 1 addition & 1 deletion src/builder/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ const reducers = {
const defaultOptions = {
strategy: 'balanced',
highWaterMark: 100,
reduceSingleLeafToSelf: false
reduceSingleLeafToSelf: true
}

module.exports = function (Chunker, ipld, _options) {
Expand Down
55 changes: 44 additions & 11 deletions src/builder/reduce.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,52 @@ const DAGNode = dagPB.DAGNode

module.exports = function (file, ipld, options) {
return function (leaves, callback) {
if (leaves.length === 1 && (leaves[0].single || options.reduceSingleLeafToSelf)) {
const leave = leaves[0]
callback(null, {
path: file.path,
multihash: leave.multihash,
size: leave.size,
leafSize: leave.leafSize,
name: leave.name
})
return // early
if (leaves.length === 1 && leaves[0].single && options.reduceSingleLeafToSelf) {
const leaf = leaves[0]

if (!options.rawLeafNodes) {
return callback(null, {
path: file.path,
multihash: leaf.multihash,
size: leaf.size,
leafSize: leaf.leafSize,
name: leaf.name
})
}

// we are using raw leaf nodes, this file only has one node but it'll be marked raw
// so convert it back to a file node
return waterfall([
(cb) => ipld.get(new CID(leaf.multihash), cb),
(result, cb) => {
const meta = UnixFS.unmarshal(result.value.data)
const fileNode = new UnixFS('file', meta.data)

DAGNode.create(fileNode.marshal(), [], options.hashAlg, (err, node) => {
cb(err, { DAGNode: node, fileNode: fileNode })
})
},
(result, cb) => {
let cid = new CID(result.DAGNode.multihash)

if (options.cidVersion === 1) {
cid = cid.toV1()
}

ipld.put(result.DAGNode, { cid }, (err) => cb(err, result))
},
(result, cb) => {
cb(null, {
multihash: result.DAGNode.multihash,
size: result.DAGNode.size,
leafSize: result.fileNode.fileSize(),
name: ''
})
}
], callback)
}

// create a parent node and add all the leafs
// create a parent node and add all the leaves
const f = new UnixFS('file')

const links = leaves.map((leaf) => {
Expand Down
3 changes: 2 additions & 1 deletion src/importer/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ const chunkers = {
}

const defaultOptions = {
chunker: 'fixed'
chunker: 'fixed',
rawLeafNodes: false
}

module.exports = function (ipld, _options) {
Expand Down
87 changes: 87 additions & 0 deletions test/importer.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ const CID = require('cids')
const Ipld = require('ipld')
const loadFixture = require('aegir/fixtures')
const each = require('async/each')
const waterfall = require('async/waterfall')
const parallel = require('async/parallel')
const UnixFs = require('ipfs-unixfs')

function stringifyMh (files) {
return files.map((file) => {
Expand Down Expand Up @@ -104,7 +107,67 @@ const strategyOverrides = {
size: 2669627
}
}
}

const checkLeafNodeTypes = (ipld, options, expected, done) => {
waterfall([
(cb) => pull(
pull.once({
path: '/foo',
content: Buffer.alloc(262144 + 5).fill(1)
}),
importer(ipld, options),
pull.collect(cb)
),
(files, cb) => ipld.get(new CID(files[0].multihash), cb),
(result, cb) => {
const node = result.value
const meta = UnixFs.unmarshal(node.data)

expect(meta.type).to.equal('file')
expect(node.links.length).to.equal(2)

parallel(
node.links.map(link => {
return (done) => {
waterfall([
(next) => ipld.get(new CID(link.multihash), next),
(result, next) => {
const node = result.value
const meta = UnixFs.unmarshal(node.data)

expect(meta.type).to.equal(expected)

next()
}
], done)
}
}), cb)
}
], done)
}

const checkNodeLinks = (ipld, options, expected, done) => {
waterfall([
(cb) => pull(
pull.once({
path: '/foo',
content: Buffer.alloc(100).fill(1)
}),
importer(ipld, options),
pull.collect(cb)
),
(files, cb) => ipld.get(new CID(files[0].multihash), cb),
(result, cb) => {
const node = result.value
const meta = UnixFs.unmarshal(node.data)

expect(meta.type).to.equal('file')
expect(node.links.length).to.equal(expected)

cb()
}
], done)
}

module.exports = (repo) => {
Expand Down Expand Up @@ -517,6 +580,30 @@ module.exports = (repo) => {
pull.collect(onCollected)
)
})

it('imports file with raw leaf nodes when specified', (done) => {
checkLeafNodeTypes(ipld, {
rawLeafNodes: true
}, 'raw', done)
})

it('imports file with file leaf nodes when specified', (done) => {
checkLeafNodeTypes(ipld, {
rawLeafNodes: false
}, 'file', done)
})

it('reduces file to single node when specified', (done) => {
checkNodeLinks(ipld, {
reduceSingleLeafToSelf: true
}, 0, done)
})

it('does not reduce file to single node when overidden by options', (done) => {
checkNodeLinks(ipld, {
reduceSingleLeafToSelf: false
}, 1, done)
})
})
})
}

0 comments on commit 9d44a75

Please sign in to comment.