Migrate hashing functions to C.
This commit is contained in:
parent
7339802f34
commit
c99bdfe747
10 changed files with 287 additions and 39 deletions
|
|
@ -1,4 +1,4 @@
|
|||
Session.vim
|
||||
pkg/*
|
||||
docs/*
|
||||
|
||||
tmp/*
|
||||
|
|
|
|||
14
README.md
14
README.md
|
|
@ -25,7 +25,7 @@ This was tested against ezmlm-idx 7.2.2.
|
|||
*Strong recommendation*: Create your lists with archiving (-a) and
|
||||
indexing (-i)! This library is suitable for modifying behavior of
|
||||
existing lists as a default, but with these flags enabled, can also
|
||||
be an interface to parsing and browsing the content of lists.
|
||||
be a generic interface for parsing and browsing list content.
|
||||
|
||||
|
||||
## Prerequisites
|
||||
|
|
@ -67,6 +67,18 @@ page for ezmlm-reject.) Patches are welcome if you'd like these sorts
|
|||
of miscellaneous things included.
|
||||
|
||||
|
||||
## Acknowledgments
|
||||
|
||||
Portions of this library are copied from ezmlm-idx source, authored by
|
||||
the following:
|
||||
|
||||
* D. J. Bernstein <djb@cr.yp.to>
|
||||
* Bruce Guenter <bruce@untroubled.org>
|
||||
|
||||
Many thanks for Dan and Bruce for their commitment for fine software, and
|
||||
contributions to the internet communities over the years.
|
||||
|
||||
|
||||
## License
|
||||
|
||||
Copyright (c) 2017, Mahlon E. Smith <mahlon@martini.nu>
|
||||
|
|
|
|||
18
Rakefile
18
Rakefile
|
|
@ -3,6 +3,12 @@
|
|||
|
||||
require 'pathname'
|
||||
|
||||
begin
|
||||
require 'rake/extensiontask'
|
||||
rescue LoadError
|
||||
abort "This Rakefile requires rake-compiler (gem install rake-compiler)"
|
||||
end
|
||||
|
||||
PROJECT = 'ezmlm'
|
||||
BASEDIR = Pathname.new( __FILE__ ).expand_path.dirname.relative_path_from( Pathname.getwd )
|
||||
LIBDIR = BASEDIR + 'lib'
|
||||
|
|
@ -49,6 +55,7 @@ manager for use with the Qmail MTA, and the messages contained therein.
|
|||
(The -idx provides an extended feature set over the original ezmlm
|
||||
environment.)
|
||||
EOF
|
||||
s.extensions = FileList[ "ext/**/extconf.rb" ]
|
||||
s.required_ruby_version = '>= 2.1'
|
||||
|
||||
s.add_dependency 'mail', "~> 2.6"
|
||||
|
|
@ -60,6 +67,9 @@ Gem::PackageTask.new( spec ) do |pkg|
|
|||
pkg.need_tar = true
|
||||
end
|
||||
|
||||
# Build the C extension for hashing addresses.
|
||||
Rake::ExtensionTask.new( 'ezmlm/hash', spec )
|
||||
|
||||
|
||||
########################################################################
|
||||
### D O C U M E N T A T I O N
|
||||
|
|
@ -74,7 +84,7 @@ begin
|
|||
rdoc.rdoc_dir = 'docs'
|
||||
rdoc.main = "README.rdoc"
|
||||
# rdoc.options = [ '-f', 'fivefish' ]
|
||||
rdoc.rdoc_files = [ 'lib', *FileList['*.rdoc'] ]
|
||||
rdoc.rdoc_files = [ 'lib', *FileList['ext/*/*.c'], *FileList['*.rdoc'] ]
|
||||
end
|
||||
|
||||
RDoc::Task.new do |rdoc|
|
||||
|
|
@ -116,6 +126,12 @@ end
|
|||
### M A N I F E S T
|
||||
########################################################################
|
||||
__END__
|
||||
ext/ezmlm/hash/hash.c
|
||||
ext/ezmlm/hash/hash.h
|
||||
ext/ezmlm/hash/extconf.rb
|
||||
lib/ezmlm/list.rb
|
||||
lib/ezmlm/list/message.rb
|
||||
lib/ezmlm/list/thread.rb
|
||||
lib/ezmlm/list/author.rb
|
||||
lib/ezmlm.rb
|
||||
|
||||
|
|
|
|||
BIN
experiments/surf.pdf
Normal file
BIN
experiments/surf.pdf
Normal file
Binary file not shown.
5
ext/ezmlm/hash/extconf.rb
Normal file
5
ext/ezmlm/hash/extconf.rb
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
|
||||
require 'mkmf'
|
||||
|
||||
create_makefile( 'ezmlm/hash' )
|
||||
|
||||
193
ext/ezmlm/hash/hash.c
Normal file
193
ext/ezmlm/hash/hash.c
Normal file
|
|
@ -0,0 +1,193 @@
|
|||
|
||||
#include "hash.h"
|
||||
|
||||
/*
|
||||
* I originally attemped to just convert surf.c to pure Ruby, but I
|
||||
* confess a lack of understanding surrounding the char casts from
|
||||
* unsigned ints, etc, and screwing up a hash algo doesn't do anyone
|
||||
* any good, least of all, me. In other words, I don't have to fully
|
||||
* understand DJB code to trust in it. :-)
|
||||
*
|
||||
* The following is copied verbatim from the ezmlm-idx source, version
|
||||
* 7.2.2. See: subhash.c, surf.c, and surfpcs.c.
|
||||
*
|
||||
*/
|
||||
|
||||
void surf(unsigned int out[8],const unsigned int in[12],const unsigned int seed[32])
|
||||
{
|
||||
unsigned int t[12]; unsigned int x; unsigned int sum = 0;
|
||||
int r; int i; int loop;
|
||||
|
||||
for (i = 0;i < 12;++i) t[i] = in[i] ^ seed[12 + i];
|
||||
for (i = 0;i < 8;++i) out[i] = seed[24 + i];
|
||||
x = t[11];
|
||||
for (loop = 0;loop < 2;++loop) {
|
||||
for (r = 0;r < 16;++r) {
|
||||
sum += 0x9e3779b9;
|
||||
MUSH(0,5) MUSH(1,7) MUSH(2,9) MUSH(3,13)
|
||||
MUSH(4,5) MUSH(5,7) MUSH(6,9) MUSH(7,13)
|
||||
MUSH(8,5) MUSH(9,7) MUSH(10,9) MUSH(11,13)
|
||||
}
|
||||
for (i = 0;i < 8;++i) out[i] ^= t[i + 4];
|
||||
}
|
||||
}
|
||||
|
||||
void surfpcs_init(surfpcs *s,const unsigned int k[32])
|
||||
{
|
||||
int i;
|
||||
for (i = 0;i < 32;++i) s->seed[i] = k[i];
|
||||
for (i = 0;i < 8;++i) s->sum[i] = 0;
|
||||
for (i = 0;i < 12;++i) s->in[i] = 0;
|
||||
s->todo = 0;
|
||||
}
|
||||
|
||||
void surfpcs_add(surfpcs *s,const char *x,unsigned int n)
|
||||
{
|
||||
int i;
|
||||
while (n--) {
|
||||
data[end[s->todo++]] = *x++;
|
||||
if (s->todo == 32) {
|
||||
s->todo = 0;
|
||||
if (!++s->in[8])
|
||||
if (!++s->in[9])
|
||||
if (!++s->in[10])
|
||||
++s->in[11];
|
||||
surf(s->out,s->in,s->seed);
|
||||
for (i = 0;i < 8;++i)
|
||||
s->sum[i] += s->out[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void surfpcs_addlc(surfpcs *s,const char *x,unsigned int n)
|
||||
/* modified from surfpcs_add by case-independence and skipping ' ' & '\t' */
|
||||
{
|
||||
unsigned char ch;
|
||||
int i;
|
||||
while (n--) {
|
||||
ch = *x++;
|
||||
if (ch == ' ' || ch == '\t') continue;
|
||||
if (ch >= 'A' && ch <= 'Z')
|
||||
ch -= 'a' - 'A';
|
||||
|
||||
data[end[s->todo++]] = ch;
|
||||
if (s->todo == 32) {
|
||||
s->todo = 0;
|
||||
if (!++s->in[8])
|
||||
if (!++s->in[9])
|
||||
if (!++s->in[10])
|
||||
++s->in[11];
|
||||
surf(s->out,s->in,s->seed);
|
||||
for (i = 0;i < 8;++i)
|
||||
s->sum[i] += s->out[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void surfpcs_out(surfpcs *s,unsigned char h[32])
|
||||
{
|
||||
int i;
|
||||
surfpcs_add(s,".",1);
|
||||
while (s->todo) surfpcs_add(s,"",1);
|
||||
for (i = 0;i < 8;++i) s->in[i] = s->sum[i];
|
||||
for (;i < 12;++i) s->in[i] = 0;
|
||||
surf(s->out,s->in,s->seed);
|
||||
for (i = 0;i < 32;++i) h[i] = outdata[end[i]];
|
||||
}
|
||||
|
||||
void makehash(const char *indata,unsigned int inlen,char *hash)
|
||||
/* makes hash[COOKIE=20] from stralloc *indata, ignoring case and */
|
||||
/* SPACE/TAB */
|
||||
{
|
||||
unsigned char h[32];
|
||||
surfpcs s;
|
||||
unsigned int seed[32];
|
||||
int i;
|
||||
|
||||
for (i = 0;i < 32;++i) seed[i] = 0;
|
||||
surfpcs_init(&s,seed);
|
||||
surfpcs_addlc(&s,indata,inlen);
|
||||
surfpcs_out(&s,h);
|
||||
for (i = 0;i < 20;++i)
|
||||
hash[i] = 'a' + (h[i] & 15);
|
||||
}
|
||||
|
||||
unsigned int subhashb(const char *s,long len)
|
||||
{
|
||||
unsigned long h;
|
||||
h = 5381;
|
||||
while (len-- > 0)
|
||||
h = (h + (h << 5)) ^ (unsigned int)*s++;
|
||||
return h % 53;
|
||||
}
|
||||
|
||||
unsigned int subhashs(const char *s)
|
||||
{
|
||||
return subhashb(s,strlen(s));
|
||||
}
|
||||
|
||||
/* end copy of ezmlm-idx source */
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* callseq:
|
||||
* Ezmlm::Hash.address( email ) -> String
|
||||
*
|
||||
* Call the Surf hashing function on an +email+ address, returning
|
||||
* the hashed string. This is specific to how ezmlm is seeding
|
||||
* the hash, and parsing email addresses from messages (prefixed with
|
||||
* the '<' character.)
|
||||
*
|
||||
*/
|
||||
VALUE
|
||||
address( VALUE klass, VALUE email ) {
|
||||
char hash[20];
|
||||
char *input;
|
||||
|
||||
Check_Type( email, T_STRING );
|
||||
|
||||
email = rb_str_plus( rb_str_new2("<"), email);
|
||||
input = StringValueCStr( email );
|
||||
|
||||
makehash( input, strlen(input), hash );
|
||||
|
||||
return rb_str_new2( hash );
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* callseq:
|
||||
* Ezmlm::Hash.subscriber( address ) -> String
|
||||
*
|
||||
* Call the subscriber hashing function on an email +address+, returning
|
||||
* the index character referring to the file containing subscriber presence.
|
||||
*
|
||||
*/
|
||||
VALUE
|
||||
subscriber( VALUE klass, VALUE email ) {
|
||||
unsigned int prefix;
|
||||
|
||||
Check_Type( email, T_STRING );
|
||||
|
||||
email = rb_str_plus( rb_str_new2("T"), email);
|
||||
prefix = subhashs( StringValueCStr(email) ) + 64;
|
||||
|
||||
return rb_sprintf( "%c", (char)prefix );
|
||||
}
|
||||
|
||||
|
||||
|
||||
void
|
||||
Init_hash()
|
||||
{
|
||||
rb_mEzmlm = rb_define_module( "Ezmlm" );
|
||||
rb_cEZHash = rb_define_class_under( rb_mEzmlm, "Hash", rb_cObject );
|
||||
|
||||
rb_define_module_function( rb_cEZHash, "address", address, 1 );
|
||||
rb_define_module_function( rb_cEZHash, "subscriber", subscriber, 1 );
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
46
ext/ezmlm/hash/hash.h
Normal file
46
ext/ezmlm/hash/hash.h
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
|
||||
#include <ruby.h>
|
||||
|
||||
static VALUE rb_mEzmlm;
|
||||
static VALUE rb_cEZHash;
|
||||
|
||||
|
||||
#ifndef SURF_H
|
||||
#define SURF_H
|
||||
|
||||
#define ROTATE(x,b) (((x) << (b)) | ((x) >> (32 - (b))))
|
||||
#define MUSH(i,b) x = t[i] += (((x ^ seed[i]) + sum) ^ ROTATE(x,b));
|
||||
|
||||
typedef struct {
|
||||
unsigned int seed[32];
|
||||
unsigned int sum[8];
|
||||
unsigned int out[8];
|
||||
unsigned int in[12];
|
||||
int todo;
|
||||
} surfpcs;
|
||||
|
||||
static const unsigned int littleendian[8] = {
|
||||
0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c,
|
||||
0x13121110, 0x17161514, 0x1b1a1918, 0x1f1e1d1c
|
||||
} ;
|
||||
#define end ((unsigned char *) &littleendian)
|
||||
#define data ((unsigned char *) s->in)
|
||||
#define outdata ((unsigned char *) s->out)
|
||||
|
||||
extern void surf( unsigned int out[8], const unsigned int in[12], const unsigned int seed[32] );
|
||||
extern void surfpcs_init( surfpcs *s, const unsigned int k[32] );
|
||||
extern void surfpcs_add( surfpcs *s, const char *x,unsigned int n );
|
||||
extern void surfpcs_addlc( surfpcs *s, const char *x,unsigned int n );
|
||||
extern void surfpcs_out( surfpcs *s, unsigned char h[32] );
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef SUBHASH_H
|
||||
#define SUBHASH_H
|
||||
|
||||
unsigned int subhashs(const char *s);
|
||||
unsigned int subhashb(const char *s,long len);
|
||||
#define subhashsa(SA) subhashb((SA)->s,(SA)->len)
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -23,6 +23,7 @@ module Ezmlm
|
|||
|
||||
# Suck in the components.
|
||||
#
|
||||
require 'ezmlm/hash'
|
||||
require 'ezmlm/list'
|
||||
require 'ezmlm/list/author'
|
||||
require 'ezmlm/list/message'
|
||||
|
|
|
|||
|
|
@ -19,10 +19,6 @@ require 'ezmlm' unless defined?( Ezmlm )
|
|||
###
|
||||
class Ezmlm::List
|
||||
|
||||
# Quick address space detection, to (hopefully)
|
||||
# match the overflow size on this machine.
|
||||
ADDRESS_SPACE = [ 'i' ].pack( 'p' ).size * 8
|
||||
|
||||
# Valid subdirectories/sections for subscriptions.
|
||||
SUBSCRIPTION_DIRS = %w[ deny mod digest allow ]
|
||||
|
||||
|
|
@ -75,7 +71,7 @@ class Ezmlm::List
|
|||
###
|
||||
def include?( addr, section: nil )
|
||||
addr.downcase!
|
||||
file = self.subscription_dir( section ) + self.hashchar( addr )
|
||||
file = self.subscription_dir( section ) + Ezmlm::Hash.subscriber( addr )
|
||||
return false unless file.exist?
|
||||
return file.read.scan( /T([^\0]+)\0/ ).flatten.include?( addr )
|
||||
end
|
||||
|
|
@ -97,7 +93,7 @@ class Ezmlm::List
|
|||
next unless address.index( '@' )
|
||||
address.downcase!
|
||||
|
||||
file = self.subscription_dir( section ) + self.hashchar( address )
|
||||
file = self.subscription_dir( section ) + Ezmlm::Hash.subscriber( address )
|
||||
self.with_safety do
|
||||
if file.exist?
|
||||
addresses = file.read.scan( /T([^\0]+)\0/ ).flatten
|
||||
|
|
@ -123,7 +119,7 @@ class Ezmlm::List
|
|||
addr.each do |address|
|
||||
address.downcase!
|
||||
|
||||
file = self.subscription_dir( section ) + self.hashchar( address )
|
||||
file = self.subscription_dir( section ) + Ezmlm::Hash.subscriber( address )
|
||||
self.with_safety do
|
||||
next unless file.exist?
|
||||
addresses = file.read.scan( /T([^\0]+)\0/ ).flatten
|
||||
|
|
@ -680,10 +676,12 @@ class Ezmlm::List
|
|||
end
|
||||
|
||||
|
||||
### Return an Author object for the given +author_id+.
|
||||
### Return an Author object for the given +author_id+, which
|
||||
### could also be an email address.
|
||||
###
|
||||
def author( author_id )
|
||||
raise "Archiving is not enabled." unless self.archived?
|
||||
author_id = Ezmlm::Hash.address(author_id) if author_id.index( '@' )
|
||||
return Ezmlm::List::Author.new( self, author_id )
|
||||
end
|
||||
|
||||
|
|
@ -729,34 +727,6 @@ class Ezmlm::List
|
|||
protected
|
||||
#########
|
||||
|
||||
### Hash an email address, using the ezmlm algorithm for
|
||||
### fast user lookups. Returns the hashed integer.
|
||||
###
|
||||
### Older ezmlm didn't lowercase addresses, anything within the last
|
||||
### decade did. We're not going to worry about compatibility there.
|
||||
###
|
||||
### See: subhash.c in the ezmlm source.
|
||||
###
|
||||
def subhash( addr )
|
||||
h = 5381
|
||||
over = 2 ** ADDRESS_SPACE
|
||||
|
||||
addr = 'T' + addr.downcase
|
||||
addr.each_char do |c|
|
||||
h = ( h + ( h << 5 ) ) ^ c.ord
|
||||
h = h % over if h > over # emulate integer overflow
|
||||
end
|
||||
return h % 53
|
||||
end
|
||||
|
||||
|
||||
### Given an email address, return the ascii hash prefix.
|
||||
###
|
||||
def hashchar( addr )
|
||||
return ( self.subhash(addr) + 64 ).chr
|
||||
end
|
||||
|
||||
|
||||
### Just return the contents of the provided +file+, rooted
|
||||
### in the list directory.
|
||||
###
|
||||
|
|
|
|||
|
|
@ -341,6 +341,11 @@ describe Ezmlm::List do
|
|||
expect( list.author('ojjhjlapnejjlbcplabi') ).to be_a( Ezmlm::List::Author )
|
||||
end
|
||||
|
||||
it 'fetches author objects by email address' do
|
||||
author = list.author( 'ojjhjlapnejjlbcplabi' )
|
||||
expect( list.author('yvette@example.net').name ).to eq( author.name )
|
||||
end
|
||||
|
||||
|
||||
context 'fetching messages' do
|
||||
it 'raises an error if archiving is disabled' do
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue