Writing tests and learning promises in node.js

(In which I describe how I did a naive transition to using promises instead of callbacks, and then did something slightly better.)

At the moment, I’m in an under-employed state. Hopefully the consulting will pick up soon, but instead of looking for a real job I’m cleaning up lots of old library code I use (and learning new things like Elixir and Kafka, but that’s a different topic). This post is the second about my work learning to use promises and async/await.

Today I worked on upgrading my code for saving and/or updating bulk docs to CouchDB (couchdb_bulkdoc_appender). As I wrote in my last post, I’m switching from mocha to tap, trying never to use lodash and d3-queue, and trying to use promises or async/await rather than callbacks. Replacing callbacks with promises is tricky because most of my code follows the usual node.js callback idiom. The last thing I want to do is break old code as a result of a stray npm install.

One strategy I’ve been using to migrate to promises is to test the callback function. If it is falsy (null or undefined), then I return a Promise, and if it is true, I run through my usual callback approach. For example, my old code did this:

function saver(docs,next){
    if(next===undefined) next = function(){}

    ///... do async stuff  ...///
    some_async_call(docs,function(e,r)
       if(e) return next(e)
       ... do something ...
       return next(null,final_result)
    })
    return null
}

In other words, if a callback wasn’t passed in, that didn’t mean I was using promises; it meant I wasn’t interested in the response, I would make an empty function to accept the result of the asynchronous function call. The callback was optional, but always was turned into a function.

So now I need to change that. My first instinct is to do a minimal amount of work, something like:

function saver(docs,next){
    const has_callback = typeof next === 'function'
    // define my handler
    const handler = function(r){
        ... do something with r ...
        return final_result
    }
    ///... do async stuff  ...///
    if(has_callback){
        some_async_call(docs
                       ,function(e,r){
                           if(e) return next(e)
                           return next(null,handler(r))
                       })
        return null
    }else{
        return new Promise( (resolve,reject) =>{
            some_async_call(docs
                           ,function(e,r){
                               if(e) return reject(e)
                               return resolve(handler(r))
                           })
            return null
        })
    }
}

Although that preserves functionality for old code, it is pretty ugly, and it seems to me that I’m missing an important point: even though the caller may be using callbacks, this function can stick to promises everywhere. So version two looks more like this:

function saver(docs,next){
    const has_callback = typeof next === 'function'
    // define my handler
    const handler = function(r){
        ... do something with r ...
        return final_result
    }
    ///... do async stuff wrapped in a promise ...///
    const p = new Promise( (resolve,reject) =>{
        some_async_call(docs
                       ,function(e,r){
                           if(e) return reject(e)
                               return resolve(handler(r))
                           })
        return null
    }
    if(has_callback){
        p.then( r =>{
            // invoke the callback
            return next(null,r)
        })
        .catch( e =>{
            // invoke the callback
            return next(e)
        })
        return null
    }else{
        // just return the promise
        return p
    }

That looks like it will work, but I’ve never tried it, because in my case I’m using superagent for my asynchronous call. That means I can capitalize on the fact that superagent is “thenable” as long as you don’t call end()). So I’m doing something like this:

function saver(docs,next){
    const has_callback = typeof next === 'function'
    // define my handler
    const handler = function(r){
        ... do something with r ...
        return final_result
    }
    ///... do async stuff with suparagent ...///
    const req = superagent.post(...)
        .auth(...)
        .send(...)
    // always use then to invoke the handler
    req.then( r=> {
        const final_result = handler(r)
        if(has_callback){
            return next(null,final_result)
        }else{
            return final_result
        }
        return null
    }).catch( e => {
        console.log('caught error',e)
        if(has_callback) return next(e)
        // otherwise, pass the buck
        throw e
    })
    return null
}

This is still promise-rookie programming…more like purgatory than the promised land I guess. But getting there.

But then I did something more clever but also a little dumb that prompted me to write this whole post. Instead of calling the handler inside of the then, I decided to try chaining the callbacks—first triggering the handler, then triggering the part that checks whether to use next or not, like so:

function saver(docs,next){
    const has_callback = typeof next === 'function'
    // define my handler
    const handler = function(r){
        ... do something with r ...
        return final_result
    }
    ///... do async stuff with suparagent ...///
    const req = superagent.post(...)
        .auth(...)
        .send(...)
    // always use then to invoke the handler
    req.then( handler)
    if(has_callback){
            // stack on another then handler
            req.then( final_result => {
                // final_result is output from "handler" above
                return next(null,final_result)
            })
            .catch( e=>{
                return next(e)
            })
            return null
    }else{
        // promise version, just return the promise
        return req
    }
}

The mistake in the above code is my misunderstanding of how “then” works. Then returns a promise, and that’s why it can be chained. I had the impression that the then objects just stack up like donuts. So when I called req.then() once, I had somehow changed req such that when I called req.then() a second time inside the if statement, it was the same as req.then(handler).then( possible callback ). This isn’t true, and my tests were failing.

I see ways to fix this. The first way is to assign the .then to req, as in

req = req.then(handler)
if(has_callback)...

This works fine, but req isn’t a const anymore. The second way I thought of is to always call the second then, and always return the req object, as follows:

return
    req.then( handler)
       .then( final_result => {
            // final_result is output from "handler" above
            if( has_callback) {
                return next(null,final_result)
            }else{
                return final_result
            }
        })
        .catch( e=>{
            if( has_callback) {
                return next(e)
            }else{
                throw e
            }
        })
        return null

That works and passes my tests, but I don’t like it because it seems odd to both return a promise and execute a callback.

I tried pretty hard to use async/await instead of promises, but given I’m trying to preserve the old callback approach for older code, I can’t see how to insert the async call that makes await possible. The only place I was able to do that was in the internal callback.

The final version I have settled on for now is the following:

async function submit_all_docs(opts,keys){
    const cdb = make_cdb_from_opts(opts)
    const uri = cdb+ '/_all_docs?include_docs=true'
    const req = superagent.post(uri)
          .type('json')
          .set('accept','application/json')
    auth_check(req,opts)
    req.send({'keys':keys})
    const r = await req2
    // use await here so that I can return the body, not the response
    return r.body
}

async function submit_bulk_docs(opts,revdocs){
    const cdb = make_cdb_from_opts(opts)
    const uri = cdb+ '/_bulk_docs'
    const req2 = superagent.post(uri)
          .type('json')
          .set('accept','application/json')
    auth_check(req2,opts)
    req2.send({'docs':revdocs})
    const r = await req2
    // use await here so that I can return the body, not the response
    return r.body
}

function make_inner_handler(hash,opts){

    return async (result) => {
        // now add the revisions to the docs and save updates
        var revdocs = []
        result.rows.forEach(function(row){
            if(row.error==='not_found') return hash[row.key]
            if(row.error) throw new Error(row.error)
            // now copy old values
            // after first deleting the old _rev from hash, if any
            delete hash[row.id]._rev
            row.doc = recursive_assign(row.doc,hash[row.id])
            delete hash[row.id]
            revdocs.push(row.doc)
            return null
        })
        _.forEach(hash,function(value,key){
            revdocs.push(value)
            return null
        })
        // now rebuild hash for conflict resolution fn
        revdocs.forEach( doc =>{
            const id = doc.id || doc._id
            hash[id] = doc
        })
        return await submit_bulk_docs(opts,revdocs)
    }
}

function make_conflict_handler(hash,opts,response_handler){
    let looplimit = 10 // prevent long recursion

    const conflict_handler = (results) => {
        if(looplimit-- < 1){
            return results // I quit.
        }

        results.forEach( r =>{
            if( r.ok !== undefined ) {
                delete (hash[r.id])
            }
        })
        const keys = Object.keys(hash)
        if( keys.length > 0){
            // recursive call to get, handle, put
            return submit_all_docs(opts,keys)
                .then( response_handler )
                .then( conflict_handler )
        } else {
            return results
        }
    }
    return conflict_handler
}

function  make_bulkdoc_saver(opts){
    const cdb = make_cdb_from_opts(opts)
    function saver(docs,next){
        const has_callback = typeof next === 'function'

        // passed a block of docs.  need to save them. To do so, first
        // request all of the doc ids, and pick off the current
        // revision number for each
        var hash = {}
        var keys = _.map(docs.docs
                        ,function(doc){
                             hash[doc._id] = doc
                             return doc._id
                        })

        const response_handler = make_inner_handler(hash,opts)
        const conflict_handler = make_conflict_handler(hash,opts,response_handler)
        // start the recursion
        const req = submit_all_docs(opts,keys)
              .then( response_handler )
              .then( conflict_handler )

        if(has_callback){
            req.then( r => {
                return next(null,r)
            })
            .catch( e=>{
                console.log('error handler, callback version')
                return next(e)
            })
            return null
        }else{
            return req
        }
    }
}

The whole shebang is up on github at https://github.com/jmarca/couchdb_bulkdoc_appender.

Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s