_hashopenssl.c 15.8 KB
Newer Older
1 2 3
/* Module that wraps all OpenSSL hash algorithms */

/*
4
 * Copyright (C) 2005-2010   Gregory P. Smith (greg@krypto.org)
5 6 7 8 9 10 11 12 13
 * Licensed to PSF under a Contributor Agreement.
 *
 * Derived from a skeleton of shamodule.c containing work performed by:
 *
 * Andrew Kuchling (amk@amk.ca)
 * Greg Stein (gstein@lyra.org)
 *
 */

Thomas Wouters's avatar
Thomas Wouters committed
14 15
#define PY_SSIZE_T_CLEAN

16 17
#include "Python.h"
#include "structmember.h"
18
#include "hashlib.h"
19

20
#ifdef WITH_THREAD
21
#include "pythread.h"
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
    #define ENTER_HASHLIB(obj) \
        if ((obj)->lock) { \
            if (!PyThread_acquire_lock((obj)->lock, 0)) { \
                Py_BEGIN_ALLOW_THREADS \
                PyThread_acquire_lock((obj)->lock, 1); \
                Py_END_ALLOW_THREADS \
            } \
        }
    #define LEAVE_HASHLIB(obj) \
        if ((obj)->lock) { \
            PyThread_release_lock((obj)->lock); \
        }
#else
    #define ENTER_HASHLIB(obj)
    #define LEAVE_HASHLIB(obj)
#endif

39 40 41 42 43 44 45 46 47 48 49 50 51 52
/* EVP is the preferred interface to hashing in OpenSSL */
#include <openssl/evp.h>

#define MUNCH_SIZE INT_MAX

/* TODO(gps): We should probably make this a module or EVPobject attribute
 * to allow the user to optimize based on the platform they're using. */
#define HASHLIB_GIL_MINSIZE 2048

#ifndef HASH_OBJ_CONSTRUCTOR
#define HASH_OBJ_CONSTRUCTOR 0
#endif


53 54 55
typedef struct {
    PyObject_HEAD
    PyObject            *name;  /* name of this hash algorithm */
56 57 58 59
    EVP_MD_CTX           ctx;   /* OpenSSL message digest context */
#ifdef WITH_THREAD
    PyThread_type_lock   lock;  /* OpenSSL context lock */
#endif
60 61 62 63 64 65 66 67 68 69 70
} EVPobject;


static PyTypeObject EVPtype;


#define DEFINE_CONSTS_FOR_NEW(Name)  \
    static PyObject *CONST_ ## Name ## _name_obj; \
    static EVP_MD_CTX CONST_new_ ## Name ## _ctx; \
    static EVP_MD_CTX *CONST_new_ ## Name ## _ctx_p = NULL;

71 72 73 74 75 76
DEFINE_CONSTS_FOR_NEW(md5)
DEFINE_CONSTS_FOR_NEW(sha1)
DEFINE_CONSTS_FOR_NEW(sha224)
DEFINE_CONSTS_FOR_NEW(sha256)
DEFINE_CONSTS_FOR_NEW(sha384)
DEFINE_CONSTS_FOR_NEW(sha512)
77 78 79 80 81 82 83 84 85 86 87


static EVPobject *
newEVPobject(PyObject *name)
{
    EVPobject *retval = (EVPobject *)PyObject_New(EVPobject, &EVPtype);

    /* save the name for .name to return */
    if (retval != NULL) {
        Py_INCREF(name);
        retval->name = name;
88 89 90
#ifdef WITH_THREAD
        retval->lock = NULL;
#endif
91 92 93 94 95
    }

    return retval;
}

96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
static void
EVP_hash(EVPobject *self, const void *vp, Py_ssize_t len)
{
    unsigned int process;
    const unsigned char *cp = (const unsigned char *)vp;
    while (0 < len) {
        if (len > (Py_ssize_t)MUNCH_SIZE)
            process = MUNCH_SIZE;
        else
            process = Py_SAFE_DOWNCAST(len, Py_ssize_t, unsigned int);
        EVP_DigestUpdate(&self->ctx, (const void*)cp, process);
        len -= process;
        cp += process;
    }
}

112 113 114
/* Internal methods for a hash object */

static void
115
EVP_dealloc(EVPobject *self)
116
{
117 118 119 120 121 122 123
#ifdef WITH_THREAD
    if (self->lock != NULL)
        PyThread_free_lock(self->lock);
#endif
    EVP_MD_CTX_cleanup(&self->ctx);
    Py_XDECREF(self->name);
    PyObject_Del(self);
124 125
}

126 127 128 129 130 131
static void locked_EVP_MD_CTX_copy(EVP_MD_CTX *new_ctx_p, EVPobject *self)
{
    ENTER_HASHLIB(self);
    EVP_MD_CTX_copy(new_ctx_p, &self->ctx);
    LEAVE_HASHLIB(self);
}
132 133 134 135 136

/* External methods for a hash object */

PyDoc_STRVAR(EVP_copy__doc__, "Return a copy of the hash object.");

137

138
static PyObject *
139
EVP_copy(EVPobject *self, PyObject *unused)
140 141 142 143 144 145
{
    EVPobject *newobj;

    if ( (newobj = newEVPobject(self->name))==NULL)
        return NULL;

146
    locked_EVP_MD_CTX_copy(&newobj->ctx, self);
147 148 149 150 151 152 153
    return (PyObject *)newobj;
}

PyDoc_STRVAR(EVP_digest__doc__,
"Return the digest value as a string of binary data.");

static PyObject *
154
EVP_digest(EVPobject *self, PyObject *unused)
155 156 157 158 159 160
{
    unsigned char digest[EVP_MAX_MD_SIZE];
    EVP_MD_CTX temp_ctx;
    PyObject *retval;
    unsigned int digest_size;

161
    locked_EVP_MD_CTX_copy(&temp_ctx, self);
162
    digest_size = EVP_MD_CTX_size(&temp_ctx);
163
    EVP_DigestFinal(&temp_ctx, digest, NULL);
164

165
    retval = PyBytes_FromStringAndSize((const char *)digest, digest_size);
166 167 168 169 170 171 172 173
    EVP_MD_CTX_cleanup(&temp_ctx);
    return retval;
}

PyDoc_STRVAR(EVP_hexdigest__doc__,
"Return the digest value as a string of hexadecimal digits.");

static PyObject *
174
EVP_hexdigest(EVPobject *self, PyObject *unused)
175 176 177 178 179 180 181 182
{
    unsigned char digest[EVP_MAX_MD_SIZE];
    EVP_MD_CTX temp_ctx;
    PyObject *retval;
    char *hex_digest;
    unsigned int i, j, digest_size;

    /* Get the raw (binary) digest value */
183
    locked_EVP_MD_CTX_copy(&temp_ctx, self);
184 185 186 187 188
    digest_size = EVP_MD_CTX_size(&temp_ctx);
    EVP_DigestFinal(&temp_ctx, digest, NULL);

    EVP_MD_CTX_cleanup(&temp_ctx);

189 190 191
    /* Allocate a new buffer */
    hex_digest = PyMem_Malloc(digest_size * 2 + 1);
    if (!hex_digest)
192
        return PyErr_NoMemory();
193 194 195 196 197

    /* Make hex version of the digest */
    for(i=j=0; i<digest_size; i++) {
        char c;
        c = (digest[i] >> 4) & 0xf;
198
        c = (c>9) ? c+'a'-10 : c + '0';
199 200
        hex_digest[j++] = c;
        c = (digest[i] & 0xf);
201
        c = (c>9) ? c+'a'-10 : c + '0';
202 203
        hex_digest[j++] = c;
    }
204 205
    retval = PyUnicode_FromStringAndSize(hex_digest, digest_size * 2);
    PyMem_Free(hex_digest);
206 207 208 209 210 211 212 213 214
    return retval;
}

PyDoc_STRVAR(EVP_update__doc__,
"Update this hash object's state with the provided string.");

static PyObject *
EVP_update(EVPobject *self, PyObject *args)
{
215
    PyObject *obj;
216
    Py_buffer view;
217

218
    if (!PyArg_ParseTuple(args, "O:update", &obj))
219 220
        return NULL;

221
    GET_BUFFER_VIEW_OR_ERROUT(obj, &view);
222 223 224 225

#ifdef WITH_THREAD
    if (self->lock == NULL && view.len >= HASHLIB_GIL_MINSIZE) {
        self->lock = PyThread_allocate_lock();
226
        /* fail? lock = NULL and we fail over to non-threaded code. */
227
    }
228

229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
    if (self->lock != NULL) {
        Py_BEGIN_ALLOW_THREADS
        PyThread_acquire_lock(self->lock, 1);
        EVP_hash(self, view.buf, view.len);
        PyThread_release_lock(self->lock);
        Py_END_ALLOW_THREADS
    } else {
        EVP_hash(self, view.buf, view.len);
    }
#else
    EVP_hash(self, view.buf, view.len);
#endif

    PyBuffer_Release(&view);
    Py_RETURN_NONE;
244 245 246
}

static PyMethodDef EVP_methods[] = {
247 248
    {"update",    (PyCFunction)EVP_update,    METH_VARARGS, EVP_update__doc__},
    {"digest",    (PyCFunction)EVP_digest,    METH_NOARGS,  EVP_digest__doc__},
249
    {"hexdigest", (PyCFunction)EVP_hexdigest, METH_NOARGS,  EVP_hexdigest__doc__},
250 251
    {"copy",      (PyCFunction)EVP_copy,      METH_NOARGS,  EVP_copy__doc__},
    {NULL, NULL}  /* sentinel */
252 253 254 255 256
};

static PyObject *
EVP_get_block_size(EVPobject *self, void *closure)
{
257 258 259
    long block_size;
    block_size = EVP_MD_CTX_block_size(&self->ctx);
    return PyLong_FromLong(block_size);
260 261 262 263 264
}

static PyObject *
EVP_get_digest_size(EVPobject *self, void *closure)
{
265 266 267
    long size;
    size = EVP_MD_CTX_size(&self->ctx);
    return PyLong_FromLong(size);
268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
}

static PyMemberDef EVP_members[] = {
    {"name", T_OBJECT, offsetof(EVPobject, name), READONLY, PyDoc_STR("algorithm name.")},
    {NULL}  /* Sentinel */
};

static PyGetSetDef EVP_getseters[] = {
    {"digest_size",
     (getter)EVP_get_digest_size, NULL,
     NULL,
     NULL},
    {"block_size",
     (getter)EVP_get_block_size, NULL,
     NULL,
     NULL},
    {NULL}  /* Sentinel */
};


static PyObject *
289
EVP_repr(EVPobject *self)
290
{
291
    return PyUnicode_FromFormat("<%U HASH object @ %p>", self->name, self);
292 293 294 295 296 297
}

#if HASH_OBJ_CONSTRUCTOR
static int
EVP_tp_init(EVPobject *self, PyObject *args, PyObject *kwds)
{
298
    static char *kwlist[] = {"name", "string", NULL};
299
    PyObject *name_obj = NULL;
300
    PyObject *data_obj = NULL;
301
    Py_buffer view;
302 303 304
    char *nameStr;
    const EVP_MD *digest;

305 306
    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:HASH", kwlist,
                                     &name_obj, &data_obj)) {
307 308 309
        return -1;
    }

310
    if (data_obj)
311
        GET_BUFFER_VIEW_OR_ERROUT(data_obj, &view);
312

313 314
    if (!PyArg_Parse(name_obj, "s", &nameStr)) {
        PyErr_SetString(PyExc_TypeError, "name must be a string");
315
        if (data_obj)
316
            PyBuffer_Release(&view);
317 318 319 320 321 322
        return -1;
    }

    digest = EVP_get_digestbyname(nameStr);
    if (!digest) {
        PyErr_SetString(PyExc_ValueError, "unknown hash function");
323
        if (data_obj)
324
            PyBuffer_Release(&view);
325 326 327 328 329 330 331
        return -1;
    }
    EVP_DigestInit(&self->ctx, digest);

    self->name = name_obj;
    Py_INCREF(self->name);

332
    if (data_obj) {
333 334 335 336
        if (view.len >= HASHLIB_GIL_MINSIZE) {
            Py_BEGIN_ALLOW_THREADS
            EVP_hash(self, view.buf, view.len);
            Py_END_ALLOW_THREADS
337
        } else {
338
            EVP_hash(self, view.buf, view.len);
339
        }
340
        PyBuffer_Release(&view);
341
    }
342

343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364
    return 0;
}
#endif


PyDoc_STRVAR(hashtype_doc,
"A hash represents the object used to calculate a checksum of a\n\
string of information.\n\
\n\
Methods:\n\
\n\
update() -- updates the current digest with an additional string\n\
digest() -- return the current digest value\n\
hexdigest() -- return the current digest as a string of hexadecimal digits\n\
copy() -- return a copy of the current hash object\n\
\n\
Attributes:\n\
\n\
name -- the hash algorithm being used by this object\n\
digest_size -- number of bytes in this hashes output\n");

static PyTypeObject EVPtype = {
365
    PyVarObject_HEAD_INIT(NULL, 0)
366
    "_hashlib.HASH",    /*tp_name*/
367 368
    sizeof(EVPobject),  /*tp_basicsize*/
    0,                  /*tp_itemsize*/
369
    /* methods */
370 371
    (destructor)EVP_dealloc, /*tp_dealloc*/
    0,                  /*tp_print*/
372 373
    0,                  /*tp_getattr*/
    0,                  /*tp_setattr*/
374
    0,                  /*tp_reserved*/
375
    (reprfunc)EVP_repr, /*tp_repr*/
376 377 378 379 380 381 382 383 384 385 386 387
    0,                  /*tp_as_number*/
    0,                  /*tp_as_sequence*/
    0,                  /*tp_as_mapping*/
    0,                  /*tp_hash*/
    0,                  /*tp_call*/
    0,                  /*tp_str*/
    0,                  /*tp_getattro*/
    0,                  /*tp_setattro*/
    0,                  /*tp_as_buffer*/
    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
    hashtype_doc,       /*tp_doc*/
    0,                  /*tp_traverse*/
388 389 390 391 392 393 394
    0,                  /*tp_clear*/
    0,                  /*tp_richcompare*/
    0,                  /*tp_weaklistoffset*/
    0,                  /*tp_iter*/
    0,                  /*tp_iternext*/
    EVP_methods,        /* tp_methods */
    EVP_members,        /* tp_members */
395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410
    EVP_getseters,      /* tp_getset */
#if 1
    0,                  /* tp_base */
    0,                  /* tp_dict */
    0,                  /* tp_descr_get */
    0,                  /* tp_descr_set */
    0,                  /* tp_dictoffset */
#endif
#if HASH_OBJ_CONSTRUCTOR
    (initproc)EVP_tp_init, /* tp_init */
#endif
};

static PyObject *
EVPnew(PyObject *name_obj,
       const EVP_MD *digest, const EVP_MD_CTX *initial_ctx,
411
       const unsigned char *cp, Py_ssize_t len)
412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428
{
    EVPobject *self;

    if (!digest && !initial_ctx) {
        PyErr_SetString(PyExc_ValueError, "unsupported hash type");
        return NULL;
    }

    if ((self = newEVPobject(name_obj)) == NULL)
        return NULL;

    if (initial_ctx) {
        EVP_MD_CTX_copy(&self->ctx, initial_ctx);
    } else {
        EVP_DigestInit(&self->ctx, digest);
    }

429
    if (cp && len) {
430 431 432 433
        if (len >= HASHLIB_GIL_MINSIZE) {
            Py_BEGIN_ALLOW_THREADS
            EVP_hash(self, cp, len);
            Py_END_ALLOW_THREADS
434
        } else {
435
            EVP_hash(self, cp, len);
436 437
        }
    }
438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454

    return (PyObject *)self;
}


/* The module-level function: new() */

PyDoc_STRVAR(EVP_new__doc__,
"Return a new hash object using the named algorithm.\n\
An optional string argument may be provided and will be\n\
automatically hashed.\n\
\n\
The MD5 and SHA1 algorithms are always supported.\n");

static PyObject *
EVP_new(PyObject *self, PyObject *args, PyObject *kwdict)
{
455
    static char *kwlist[] = {"name", "string", NULL};
456
    PyObject *name_obj = NULL;
457
    PyObject *data_obj = NULL;
458
    Py_buffer view = { 0 };
459
    PyObject *ret_obj;
460 461 462
    char *name;
    const EVP_MD *digest;

463 464
    if (!PyArg_ParseTupleAndKeywords(args, kwdict, "O|O:new", kwlist,
                                     &name_obj, &data_obj)) {
465 466 467 468 469 470 471 472
        return NULL;
    }

    if (!PyArg_Parse(name_obj, "s", &name)) {
        PyErr_SetString(PyExc_TypeError, "name must be a string");
        return NULL;
    }

473
    if (data_obj)
474
        GET_BUFFER_VIEW_OR_ERROUT(data_obj, &view);
475

476 477
    digest = EVP_get_digestbyname(name);

478
    ret_obj = EVPnew(name_obj, digest, NULL, (unsigned char*)view.buf, view.len);
479 480

    if (data_obj)
481
        PyBuffer_Release(&view);
482
    return ret_obj;
483 484 485 486 487 488 489 490 491 492 493 494 495
}

/*
 *  This macro generates constructor function definitions for specific
 *  hash algorithms.  These constructors are much faster than calling
 *  the generic one passing it a python string and are noticably
 *  faster than calling a python new() wrapper.  Thats important for
 *  code that wants to make hashes of a bunch of small strings.
 */
#define GEN_CONSTRUCTOR(NAME)  \
    static PyObject * \
    EVP_new_ ## NAME (PyObject *self, PyObject *args) \
    { \
496
        PyObject *data_obj = NULL; \
497
        Py_buffer view = { 0 }; \
498
        PyObject *ret_obj; \
499
     \
500
        if (!PyArg_ParseTuple(args, "|O:" #NAME , &data_obj)) { \
501 502 503
            return NULL; \
        } \
     \
504
        if (data_obj) \
505
            GET_BUFFER_VIEW_OR_ERROUT(data_obj, &view); \
506 507 508 509 510 511
     \
        ret_obj = EVPnew( \
                    CONST_ ## NAME ## _name_obj, \
                    NULL, \
                    CONST_new_ ## NAME ## _ctx_p, \
                    (unsigned char*)view.buf, \
512
                    view.len); \
513 514
     \
        if (data_obj) \
515
            PyBuffer_Release(&view); \
516
        return ret_obj; \
517 518 519 520 521 522 523 524 525 526 527
    }

/* a PyMethodDef structure for the constructor */
#define CONSTRUCTOR_METH_DEF(NAME)  \
    {"openssl_" #NAME, (PyCFunction)EVP_new_ ## NAME, METH_VARARGS, \
        PyDoc_STR("Returns a " #NAME \
                  " hash object; optionally initialized with a string") \
    }

/* used in the init function to setup a constructor */
#define INIT_CONSTRUCTOR_CONSTANTS(NAME)  do { \
Neal Norwitz's avatar
Neal Norwitz committed
528
    CONST_ ## NAME ## _name_obj = PyUnicode_FromString(#NAME); \
529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551
    if (EVP_get_digestbyname(#NAME)) { \
        CONST_new_ ## NAME ## _ctx_p = &CONST_new_ ## NAME ## _ctx; \
        EVP_DigestInit(CONST_new_ ## NAME ## _ctx_p, EVP_get_digestbyname(#NAME)); \
    } \
} while (0);

GEN_CONSTRUCTOR(md5)
GEN_CONSTRUCTOR(sha1)
GEN_CONSTRUCTOR(sha224)
GEN_CONSTRUCTOR(sha256)
GEN_CONSTRUCTOR(sha384)
GEN_CONSTRUCTOR(sha512)

/* List of functions exported by this module */

static struct PyMethodDef EVP_functions[] = {
    {"new", (PyCFunction)EVP_new, METH_VARARGS|METH_KEYWORDS, EVP_new__doc__},
    CONSTRUCTOR_METH_DEF(md5),
    CONSTRUCTOR_METH_DEF(sha1),
    CONSTRUCTOR_METH_DEF(sha224),
    CONSTRUCTOR_METH_DEF(sha256),
    CONSTRUCTOR_METH_DEF(sha384),
    CONSTRUCTOR_METH_DEF(sha512),
552
    {NULL, NULL}   /* Sentinel */
553 554 555 556 557
};


/* Initialize this module. */

558 559

static struct PyModuleDef _hashlibmodule = {
560 561 562 563 564 565 566 567 568
    PyModuleDef_HEAD_INIT,
    "_hashlib",
    NULL,
    -1,
    EVP_functions,
    NULL,
    NULL,
    NULL,
    NULL
569 570
};

571
PyMODINIT_FUNC
572
PyInit__hashlib(void)
573 574 575 576 577 578 579 580 581 582
{
    PyObject *m;

    OpenSSL_add_all_digests();

    /* TODO build EVP_functions openssl_* entries dynamically based
     * on what hashes are supported rather than listing many
     * but having some be unsupported.  Only init appropriate
     * constants. */

583
    Py_TYPE(&EVPtype) = &PyType_Type;
584
    if (PyType_Ready(&EVPtype) < 0)
585
        return NULL;
586

587
    m = PyModule_Create(&_hashlibmodule);
588
    if (m == NULL)
589
        return NULL;
590 591 592 593 594 595 596 597 598 599 600 601 602

#if HASH_OBJ_CONSTRUCTOR
    Py_INCREF(&EVPtype);
    PyModule_AddObject(m, "HASH", (PyObject *)&EVPtype);
#endif

    /* these constants are used by the convenience constructors */
    INIT_CONSTRUCTOR_CONSTANTS(md5);
    INIT_CONSTRUCTOR_CONSTANTS(sha1);
    INIT_CONSTRUCTOR_CONSTANTS(sha224);
    INIT_CONSTRUCTOR_CONSTANTS(sha256);
    INIT_CONSTRUCTOR_CONSTANTS(sha384);
    INIT_CONSTRUCTOR_CONSTANTS(sha512);
603
    return m;
604
}