# HG changeset patch # User Mahlon E. Smith # Date 1432064812 25200 # Node ID 755c3645e3dc11d56a998599406c8a2536e15d47 Initial commit of a TNetstring parser and serializer for the Nim programming language. diff -r 000000000000 -r 755c3645e3dc .hgignore --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.hgignore Tue May 19 12:46:52 2015 -0700 @@ -0,0 +1,4 @@ +syntax: glob +.cache +tnetstring +*.html diff -r 000000000000 -r 755c3645e3dc Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Makefile Tue May 19 12:46:52 2015 -0700 @@ -0,0 +1,25 @@ + +FILES = tnetstring.nim + +default: development + +debug: ${FILES} + nim --assertions:on --nimcache:.cache c ${FILES} + +development: ${FILES} + # can use gdb with this... + nim -r --debugInfo --linedir:on --define:testing --nimcache:.cache c ${FILES} + +debugger: ${FILES} + nim --debugger:on --nimcache:.cache c ${FILES} + +release: ${FILES} + nim -d:release --opt:speed --nimcache:.cache c ${FILES} + +docs: + nim doc ${FILES} + #nim buildIndex ${FILES} + +clean: + cat .hgignore | xargs rm -rf + diff -r 000000000000 -r 755c3645e3dc tnetstring.nim --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tnetstring.nim Tue May 19 12:46:52 2015 -0700 @@ -0,0 +1,646 @@ +# +# Copyright (c) 2015, Mahlon E. Smith +# All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# * Neither the name of Mahlon E. Smith nor the names of his +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +## This module implements a simple TNetstring parser and serializer. +## TNetString stands for "tagged netstring" and is a modification of Dan +## Bernstein's netstrings specification. TNetstrings allow for the same data +## structures as JSON but in a format that is resistant to buffer overflows +## and backward compatible with original netstrings. They make no assumptions +## about string contents, allowing for easy transmission of binary data mixed +## with strongly typed values. + +## See http://cr.yp.to/proto/netstrings.txt and http://tnetstrings.org/ for additional information. +## +## This module borrows heavily (in both usage and code) from the nim JSON stdlib +## (json.nim) -- (c) Copyright 2015 Andreas Rumpf, Dominik Picheta. +## +## Usage example: +## +## .. code-block:: nim +## +## let +## tnetstr = "52:4:test,3:1.3^4:key2,4:true!6:things,12:1:1#1:2#1:3#]}" +## tnetobj = parse_tnetstring( tnetstr ) +## +## # tnetobj is now equivalent to the structure: +## # @[(key: test, val: 1.3), (key: key2, val: true), (key: things, val: @[1, 2, 3])] +## +## assert ( tnetobj.kind == TNetstringObject ) +## echo tnetobj[ "test" ] +## echo tnetobj[ "key2" ] +## for item in tnetobj[ "things" ]: +## echo item +## +## Results in: +## +## .. code-block:: nim +## +## 1.3 +## true +## 1 +## 2 +## 3 +## +## This module can also be used to reasonably create a serialized +## TNetstring, suitable for network transmission: +## +## .. code-block:: nim +## +## let +## number = 1000 +## list = @[ "thing1", "thing2" ] +## tnettop = newTNetstringArray() # top-level array +## tnetsub = newTNetstringArray() # sub array +## +## tnettop.add( newTNetstringInt(number) ) +## for item in list: +## tnetsub.add( newTNetstringString(item) ) +## tnettop.add( tnetsub ) +## +## # Equivalent to: @[1000, @[thing1, thing2]] +## echo dump_tnetstring( tnettop ) +## +## Results in: +## +## .. code-block:: nim +## +## 29:4:1000#18:6:thing1,6:thing2,]] +## + +import + hashes, + parseutils, + strutils + +const version = "0.1.0" + +type + TNetstringKind* = enum ## enumeration of all valid types + TNetstringString, ## a string literal + TNetstringInt, ## an integer literal + TNetstringFloat, ## a float literal + TNetstringBool, ## a ``true`` or ``false`` value + TNetstringNull, ## the value ``null`` + TNetstringObject, ## an object: the ``}`` token + TNetstringArray ## an array: the ``]`` token + + TNetstringNode* = ref TNetstringNodeObj + TNetstringNodeObj* {.acyclic.} = object + extra: string + case kind*: TNetstringKind + of TNetstringString: + str*: string + of TNetstringInt: + num*: BiggestInt + of TNetstringFloat: + fnum*: float + of TNetstringBool: + bval*: bool + of TNetstringNull: + nil + of TNetstringObject: + fields*: seq[ tuple[key: string, val: TNetstringNode] ] + of TNetstringArray: + elems*: seq[ TNetstringNode ] + + TNetstringParseError* = object of ValueError ## Raised for a TNetstring error + + +proc raiseParseErr*( t: TNetstringNode, msg: string ) {.noinline, noreturn.} = + ## Raises a `TNetstringParseError` exception. + raise newException( TNetstringParseError, msg ) + + +proc newTNetstringString*( s: string ): TNetstringNode = + ## Create a new String typed TNetstringNode. + new( result ) + result.kind = TNetstringString + result.str = s + + +proc newTNetstringInt*( i: BiggestInt ): TNetstringNode = + ## Create a new Integer typed TNetstringNode. + new( result ) + result.kind = TNetstringInt + result.num = i + + +proc newTNetstringFloat*( f: float ): TNetstringNode = + ## Create a new Float typed TNetstringNode. + new( result ) + result.kind = TNetstringFloat + result.fnum = f + + +proc newTNetstringBool*( b: bool ): TNetstringNode = + ## Create a new Boolean typed TNetstringNode. + new( result ) + result.kind = TNetstringBool + result.bval = b + + +proc newTNetstringNull*(): TNetstringNode = + ## Create a new nil typed TNetstringNode. + new( result ) + result.kind = TNetstringNull + + +proc newTNetstringObject*(): TNetstringNode = + ## Create a new Object typed TNetstringNode. + new( result ) + result.kind = TNetstringObject + result.fields = @[] + + +proc newTNetstringArray*(): TNetstringNode = + ## Create a new Array typed TNetstringNode. + new( result ) + result.kind = TNetstringArray + result.elems = @[] + + +proc parse_tnetstring*( data: string ): TNetstringNode = + ## Given an encoded tnetstring, parse and return a TNetstringNode. + var + length: int + kind: char + payload: string + extra: string + + let sep_pos = data.skipUntil( ':' ) + if sep_pos == data.len: raiseParseErr( result, "Invalid data: No separator token found." ) + + try: + length = data[ 0 .. sep_pos - 1 ].parseInt + kind = data[ sep_pos + length + 1 ] + payload = data[ sep_pos + 1 .. sep_pos + length ] + extra = data[ sep_pos + length + 2 .. ^1 ] + + except ValueError, IndexError: + var msg = getCurrentExceptionMsg() + raiseParseErr( result, msg ) + + case kind: + of ',': + result = newTNetstringString( payload ) + + of '#': + try: + result = newTNetstringInt( payload.parseBiggestInt ) + except ValueError: + var msg = getCurrentExceptionMsg() + raiseParseErr( result, msg ) + + of '^': + try: + result = newTNetstringFloat( payload.parseFloat ) + except ValueError: + var msg = getCurrentExceptionMsg() + raiseParseErr( result, msg ) + + of '!': + result = newTNetstringBool( payload == "true" ) + + of '~': + if length != 0: raiseParseErr( result, "Invalid data: Payload must be 0 length for null." ) + result = newTNetstringNull() + + of ']': + result = newTNetstringArray() + + var subnode = parse_tnetstring( payload ) + result.elems.add( subnode ) + + while subnode.extra != "": + subnode = parse_tnetstring( subnode.extra ) + result.elems.add( subnode ) + + of '}': + result = newTNetstringObject() + var key = parse_tnetstring( payload ) + + if ( key.extra == "" ): raiseParseErr( result, "Invalid data: Unbalanced tuple." ) + if ( key.kind != TNetstringString ): raiseParseErr( result, "Invalid data: Object keys must be strings." ) + + var value = parse_tnetstring( key.extra ) + result.fields = @[] + result.fields.add( (key: key.str, val: value) ) + + while value.extra != "": + var subkey = parse_tnetstring( value.extra ) + if ( subkey.extra == "" ): raiseParseErr( result, "Invalid data: Unbalanced tuple." ) + if ( subkey.kind != TNetstringString ): raiseParseErr( result, "Invalid data: Object keys must be strings." ) + + value = parse_tnetstring( subkey.extra ) + result.fields.add( (key: subkey.str, val: value) ) + + else: + raiseParseErr( result, "Invalid data: Unknown tnetstring type '$1'." % $kind ) + + result.extra = extra + + +iterator items*( node: TNetstringNode ): TNetstringNode = + ## Iterator for the items of `node`. `node` has to be a TNetstringArray. + assert node.kind == TNetstringArray + for i in items( node.elems ): + yield i + + +iterator mitems*( node: var TNetstringNode ): var TNetstringNode = + ## Iterator for the items of `node`. `node` has to be a TNetstringArray. Items can be + ## modified. + assert node.kind == TNetstringArray + for i in mitems( node.elems ): + yield i + + +iterator pairs*( node: TNetstringNode ): tuple[ key: string, val: TNetstringNode ] = + ## Iterator for the child elements of `node`. `node` has to be a TNetstringObject. + assert node.kind == TNetstringObject + for key, val in items( node.fields ): + yield ( key, val ) + + +iterator mpairs*( node: var TNetstringNode ): var tuple[ key: string, val: TNetstringNode ] = + ## Iterator for the child elements of `node`. `node` has to be a TNetstringObject. + ## Items can be modified. + assert node.kind == TNetstringObject + for keyVal in mitems( node.fields ): + yield keyVal + + +proc `$`*( node: TNetstringNode ): string = + ## Delegate stringification of `TNetstringNode` to its underlying object. + return case node.kind: + of TNetstringString: + $node.str + of TNetstringInt: + $node.num + of TNetstringFloat: + $node.fnum + of TNetstringBool: + $node.bval + of TNetstringNull: + "(nil)" + of TNetstringArray: + $node.elems + of TNetstringObject: + $node.fields + + +proc `==`* ( a, b: TNetstringNode ): bool = + ## Check two TNetstring nodes for equality. + if a.isNil: + if b.isNil: return true + return false + elif b.isNil or a.kind != b.kind: + return false + else: + return case a.kind + of TNetstringString: + a.str == b.str + of TNetstringInt: + a.num == b.num + of TNetstringFloat: + a.fnum == b.fnum + of TNetstringBool: + a.bval == b.bval + of TNetstringNull: + true + of TNetstringArray: + a.elems == b.elems + of TNetstringObject: + a.fields == b.fields + + +proc copy*( node: TNetstringNode ): TNetstringNode = + ## Perform a deep copy of TNetstringNode. + new( result ) + result.kind = node.kind + result.extra = node.extra + + case node.kind + of TNetstringString: + result.str = node.str + of TNetstringInt: + result.num = node.num + of TNetstringFloat: + result.fnum = node.fnum + of TNetstringBool: + result.bval = node.bval + of TNetstringNull: + discard + of TNetstringArray: + result.elems = @[] + for item in items( node ): + result.elems.add( copy(item) ) + of TNetstringObject: + result.fields = @[] + for key, value in items( node.fields ): + result.fields.add( (key, copy(value)) ) + + +proc delete*( node: TNetstringNode, key: string ) = + ## Deletes ``node[key]`` preserving the order of the other (key, value)-pairs. + assert( node.kind == TNetstringObject ) + for i in 0..node.fields.len - 1: + if node.fields[i].key == key: + node.fields.delete( i ) + return + raise newException( IndexError, "key not in object" ) + + +proc hash*( node: TNetstringNode ): THash = + ## Compute the hash for a TNetstringString node + return case node.kind + of TNetstringString: + hash( node.str ) + of TNetstringInt: + hash( node.num ) + of TNetstringFloat: + hash( node.fnum ) + of TNetstringBool: + hash( node.bval.int ) + of TNetstringNull: + hash( 0 ) + of TNetstringArray: + hash( node.elems ) + of TNetstringObject: + hash( node.fields ) + + +proc len*( node: TNetstringNode ): int = + ## If `node` is a `TNetstringArray`, it returns the number of elements. + ## If `node` is a `TNetstringObject`, it returns the number of pairs. + ## If `node` is a `TNetstringString`, it returns strlen. + ## Else it returns 0. + return case node.kind + of TNetstringString: + node.str.len + of TNetstringArray: + node.elems.len + of TNetstringObject: + node.fields.len + else: + 0 + + +proc `[]`*( node: TNetstringNode, name: string ): TNetstringNode = + ## Gets a field from a `TNetstringNode`, which must not be nil. + ## If the value at `name` does not exist, returns nil + assert( not isNil(node) ) + assert( node.kind == TNetstringObject ) + for key, item in node: + if key == name: + return item + return nil + + +proc `[]`*( node: TNetstringNode, index: int ): TNetstringNode = + ## Gets the node at `index` in an Array. Result is undefined if `index` + ## is out of bounds. + assert( not isNil(node) ) + assert( node.kind == TNetstringArray ) + return node.elems[ index ] + + +proc hasKey*( node: TNetstringNode, key: string ): bool = + ## Checks if `key` exists in `node`. + assert( node.kind == TNetstringObject ) + for k, item in items( node.fields ): + if k == key: return true + + +proc add*( parent, child: TNetstringNode ) = + ## Appends `child` to a TNetstringArray node `parent`. + assert( parent.kind == TNetstringArray ) + parent.elems.add( child ) + + +proc add*( node: TNetstringNode, key: string, val: TNetstringNode ) = + ## Adds ``(key, val)`` pair to the TNetstringObject `node`. + ## For speed reasons no check for duplicate keys is performed. + ## (Note, ``[]=`` performs the check.) + assert( node.kind == TNetstringObject ) + node.fields.add( (key, val) ) + + +proc `[]=`*( node: TNetstringNode, index: int, val: TNetstringNode ) = + ## Sets an index for a `TNetstringArray`. + assert( node.kind == TNetstringArray ) + node.elems[ index ] = val + + +proc `[]=`*( node: TNetstringNode, key: string, val: TNetstringNode ) = + ## Sets a field from a `TNetstringObject`. Performs a check for duplicate keys. + assert( node.kind == TNetstringObject ) + for i in 0 .. node.fields.len - 1: + if node.fields[i].key == key: + node.fields[i].val = val + return + node.fields.add( (key, val) ) + + +proc dump_tnetstring*( node: TNetstringNode ): string = + ## Renders a TNetstring `node` as a regular string. + case node.kind + of TNetstringString: + result = $( node.str.len ) & ':' & node.str & ',' + of TNetstringInt: + let str = $( node.num ) + result = $( str.len ) & ':' & str & '#' + of TNetstringFloat: + let str = $( node.fnum ) + result = $( str.len ) & ':' & str & '^' + of TNetstringBool: + result = if node.bval: "4:true!" else: "5:false!" + of TNetstringNull: + result = "0:~" + of TNetstringArray: + result = "" + for n in node.items: + result = result & n.dump_tnetstring + result = $( result.len ) & ':' & result & ']' + of TNetstringObject: + result = "" + for key, val in node.pairs: + result = result & $( key.len ) & ':' & key & ',' # key + result = result & val.dump_tnetstring # val + result = $( result.len ) & ':' & result & '}' + + +# +# Tests! +# +when isMainModule: + + # Expected exceptions + # + try: + discard parse_tnetstring( "totally invalid" ) + except TNetstringParseError: + doAssert( true, "invalid tnetstring" ) + try: + discard parse_tnetstring( "what:ever" ) + except TNetstringParseError: + doAssert( true, "bad length" ) + try: + discard parse_tnetstring( "3:yep~" ) + except TNetstringParseError: + doAssert( true, "null w/ > 0 length" ) + try: + discard parse_tnetstring( "8:1:1#1:1#}" ) + except TNetstringParseError: + doAssert( true, "hash with non-string key" ) + try: + discard parse_tnetstring( "7:4:test,}" ) + except TNetstringParseError: + doAssert( true, "hash with odd number of elements" ) + try: + discard parse_tnetstring( "2:25*" ) + except TNetstringParseError: + doAssert( true, "unknown netstring tag" ) + + # Equality + # + let tnet_int = parse_tnetstring( "1:1#" ) + doAssert( tnet_int == tnet_int ) + doAssert( tnet_int == parse_tnetstring( "1:1#" ) ) + doAssert( parse_tnetstring( "0:~" ) == parse_tnetstring( "0:~" ) ) + + # Type detection + # + doAssert( tnet_int.kind == TNetstringInt ) + doAssert( parse_tnetstring( "1:a," ).kind == TNetstringString ) + doAssert( parse_tnetstring( "3:1.0^" ).kind == TNetstringFloat ) + doAssert( parse_tnetstring( "5:false!" ).kind == TNetstringBool ) + doAssert( parse_tnetstring( "0:~" ).kind == TNetstringNull ) + doAssert( parse_tnetstring( "9:2:hi,1:1#}" ).kind == TNetstringObject ) + doAssert( parse_tnetstring( "8:1:1#1:2#]" ).kind == TNetstringArray ) + + # Iteration (both array and tuple) + # + var + keys: array[ 2, string ] + vals: array[ 4, string ] + k_idx = 0 + idx = 0 + for key, val in parse_tnetstring( "35:2:hi,8:1:a,1:b,]5:there,8:1:c,1:d,]}" ): + keys[ idx ] = key + idx = idx + 1 + for item in val: + vals[ k_idx ] = item.str + k_idx = k_idx + 1 + doAssert( keys == ["hi","there"] ) + doassert( vals == ["a","b","c","d"] ) + + # Deep copies + # + var original = parse_tnetstring( "35:2:hi,8:1:a,1:b,]5:there,8:1:c,1:d,]}" ) + var copied = original.copy + doAssert( original == copied ) + doAssert( original.repr != copied.repr ) + doAssert( original.fields.pop.val.elems.pop.repr != copied.fields.pop.val.elems.pop.repr ) + + # Key deletion + # + var tnet_obj = parse_tnetstring( "35:2:hi,8:1:a,1:b,]5:there,8:1:c,1:d,]}" ) + tnet_obj.delete( "hi" ) + doAssert( tnet_obj.fields.len == 1 ) + + # Hashing + # + doAssert( tnet_int.hash == 1.hash ) + doAssert( parse_tnetstring( "4:true!" ).hash == hash( true.int ) ) + + # Length checks. + # + tnet_obj = parse_tnetstring( "35:2:hi,8:1:a,1:b,]5:there,8:1:c,1:d,]}" ) + doAssert( parse_tnetstring( "0:~" ).len == 0 ) + doAssert( tnet_obj.len == 2 ) + doAssert( parse_tnetstring( "8:1:1#1:2#]" ).len == 2 ) + doAssert( parse_tnetstring( "5:hallo," ).len == 5 ) + + # Index accessors + # + tnet_obj = parse_tnetstring( "20:1:1#1:2#1:3#1:4#1:5#]" ) + doAssert( tnet_obj[ 2 ].num == 3 ) + + # Key accessors + # + tnet_obj = parse_tnetstring( "11:2:hi,3:yep,}" ) + doAssert( $tnet_obj["hi"] == "yep" ) + doAssert( tnet_obj.has_key( "hi" ) == true ) + doAssert( tnet_obj.has_key( "nope-not-here" ) == false ) + + # Adding elements to an existing TNetstring array + # + var tnet_array = newTNetstringArray() + for i in 1 .. 10: + tnet_obj = newTNetstringInt( i ) + tnet_array.add( tnet_obj ) + tnet_array[ 6 ] = newTNetstringString( "yep" ) + doAssert( tnet_array.len == 10 ) + doAssert( tnet_array[ 4 ].num == 5 ) + doAssert( tnet_array[ 6 ].str == "yep" ) + + # Adding pairs to an existing TNetstring aobject. + # + tnet_obj = newTNetstringObject() + tnet_obj.add( "yo", newTNetstringInt(1) ) + tnet_obj.add( "yep", newTNetstringInt(2) ) + doAssert( tnet_obj["yo"].num == 1 ) + doAssert( tnet_obj["yep"].num == 2 ) + doAssert( tnet_obj.len == 2 ) + tnet_obj[ "more" ] = newTNetstringInt(1) + tnet_obj[ "yo" ] = newTNetstringInt(1) # dup check + doAssert( tnet_obj.len == 3 ) + + # Serialization. + # + var tstr = "308:9:givenName,6:Mahlon,16:departmentNumber,22:Information Technology," & + "5:title,19:Senior Technologist,13:accountConfig,48:7:vmemail,4:true!7:allpage," & + "5:false!7:galhide,0:~}13:homeDirectory,14:/home/m/mahlon,3:uid,6:mahlon,9:yubi" & + "KeyId,12:vvidhghkhehj,5:gecos,12:Mahlon Smith,2:sn,5:Smith,14:employeeNumber,5:12921#}" + tnet_obj = parse_tnetstring( tstr ) + doAssert( tstr == tnet_obj.dump_tnetstring ) + + echo "* Tests passed!" + + + while true and defined( testing ): + for line in readline( stdin ).split_lines: + let input = line.strip + try: + var tnetstring = parse_tnetstring( input ) + echo " parsed --> ", tnetstring + echo " serialized --> ", tnetstring.dump_tnetstring, "\n" + except TNetstringParseError: + echo input, " --> ", getCurrentExceptionMsg() +