1 module kaleidic.mongo_gridfs; 2 3 import kaleidic.mongo_standalone; 4 5 /++ 6 IN: mongo - active connection to the MongoDB instance. 7 gridfsBucketName - full path to the GridFS parent of collections .files and .chunks. 8 id - the file object ID, i.e., it's "_id" value. 9 RETURN: The entire file contents in an array. 10 THROWS: File not found, or on corrupt properties. 11 +/ 12 13 ubyte[] gridfsReadFile(MongoConnection mongo, const string gridfsBucketName, const ObjectId id) { 14 return gridfsReadManyFiles(mongo, gridfsBucketName, [id])[id]; 15 } 16 17 /++ 18 IN: mongo - active connection to the MongoDB instance. 19 gridfsBucketName - full path to the GridFS parent of collections .files and .chunks. 20 name - file name 21 RETURN: The entire file contents in an array. 22 THROWS: File not found, or if more than 1 file match name, or on corrupt properties. 23 +/ 24 25 ubyte[] gridfsReadFile(MongoConnection mongo, const string gridfsBucketName, const string name) { 26 return gridfsReadManyFiles(mongo, gridfsBucketName, [name])[name]; 27 } 28 29 /++ 30 IN: mongo - active connection to the MongoDB instance. 31 gridfsBucketName - full path to the GridFS parent of collections .files and .chunks. 32 ids - array of file object IDs, i.e., their "_id" values. 33 RETURN: An associative array containing id -> entire file content mappings. 34 THROWS: File not found, or on corrupt properties. 35 +/ 36 37 ubyte[][ObjectId] gridfsReadManyFiles(MongoConnection mongo, const string gridfsBucketName, const ObjectId[] ids) { 38 import std.algorithm; 39 import std.array; 40 41 if (ids.length > int.max) 42 throw new Exception("Number of requested objects exceeds int.max"); 43 44 // {"$or": [{"_id": id1}, {"_id": id2}, ... {"_id": idN}]} 45 auto query = document([bson_value("$or", makeArray(ids, "_id"))]); 46 47 OP_REPLY reply = mongo.query(gridfsBucketName ~ ".files", 0, cast(int)ids.length, query); 48 if (reply.errorCode != 0) { 49 throw new Exception(reply.errorMessage); 50 } 51 52 if (reply.documents.length != ids.length) { 53 auto givenIds = ids.dup.sort(); 54 auto existingIds = reply.documents.map!(doc => doc["_id"].get!ObjectId).array.sort(); 55 auto missingIds = setDifference(givenIds, existingIds).map!(id => id.toString()).join(", "); 56 throw new Exception("Requested file(s) not found. Missing id: " ~ missingIds); 57 } 58 59 return gridfsReadFilesById(mongo, gridfsBucketName, ids); 60 } 61 62 /++ 63 IN: mongo - active connection to the MongoDB instance. 64 gridfsBucketName - full path to the GridFS parent of collections .files and .chunks. 65 names - array of file names. 66 RETURN: An associative array containing file name -> entire file content mappings. 67 THROWS: File not found, or if more than 1 file match name, or on corrupt properties. 68 +/ 69 70 ubyte[][string] gridfsReadManyFiles(MongoConnection mongo, const string gridfsBucketName, const string[] names) { 71 import std.algorithm; 72 import std.array; 73 import std.typecons; 74 75 // {"$or": [{"filename": name1}, {"filename": name2}, ... {"filename": nameN}]} 76 auto query = document([bson_value("$or", makeArray(names, "filename"))]); 77 78 OP_REPLY reply = mongo.query(gridfsBucketName ~ ".files", 0, int.max, query); 79 if (reply.errorCode != 0) { 80 throw new Exception(reply.errorMessage); 81 } 82 83 if (reply.documents.length != names.length) { 84 auto givenNames = names.dup().sort(); 85 auto existingNames = reply.documents.map!(doc => doc["filename"].get!string).array().sort(); 86 auto missingNames = setDifference(givenNames, existingNames).join(", "); 87 throw new Exception("Requested file(s) not found. Missing file(s): " ~ missingNames); 88 } 89 90 string[ObjectId] mappings = reply.documents 91 .map!(doc => tuple(doc["_id"].get!ObjectId, doc["filename"].get!string)) 92 .assocArray(); 93 94 auto result = gridfsReadFilesById(mongo, gridfsBucketName, mappings.keys) 95 .byPair() 96 .map!(tup => tuple(mappings[tup[0]], tup[1])) 97 .assocArray(); 98 99 return result; 100 } 101 102 private ubyte[][ObjectId] gridfsReadFilesById(MongoConnection mongo, const string gridfsBucketName, 103 const ObjectId[] ids) { 104 import std.outbuffer; 105 import std.algorithm; 106 import std.array; 107 import std.typecons; 108 109 // {"$or": [{"files_id": id1}, {"files_id": id2}, ... {"files_id": idN}]} 110 auto query = bson_value("$query", document([bson_value("$or", makeArray(ids, "files_id"))])); 111 // order results by files_id and chunk number 112 auto orderBy = bson_value("$orderby", document([bson_value("files_id", 1), bson_value("n", 1)])); 113 auto queryDoc = document([query, orderBy]); 114 115 string collectionName = gridfsBucketName ~ ".chunks"; 116 OutBuffer[ObjectId] result; 117 118 auto handleReply = (OP_REPLY reply) { 119 if (reply.errorCode != 0) { 120 throw new Exception(reply.errorMessage); 121 } 122 foreach (doc; reply.documents) { 123 if (doc["$err"] != bson_value.init) { 124 throw new Exception(doc["$err"].get!string); 125 } 126 auto contents = result.require(doc["files_id"].get!ObjectId, new OutBuffer()); 127 contents.write(doc["data"].get!(const(ubyte[]))); 128 } 129 }; 130 131 OP_REPLY reply = mongo.query(collectionName, 0, int.max, queryDoc); 132 handleReply(reply); 133 134 // fetch remaining documents if any 135 while (reply.cursorID != 0) { 136 reply = mongo.getMore(collectionName, int.max, reply.cursorID); 137 handleReply(reply); 138 } 139 140 return result.byPair().map!(tup => tuple(tup[0], tup[1].toBytes())).assocArray(); 141 } 142 143 private bson_value[] makeArray(T)(T[] xs, string key) { 144 import std.algorithm; 145 import std.range; 146 import std.conv; 147 148 return xs 149 .enumerate() 150 .map!(tup => bson_value(tup.index.to!string, document([bson_value(key, tup.value)]))) 151 .array(); 152 }