1 module kaleidic.mongo_gridfs;
2 
3 import kaleidic.mongo_standalone;
4 
5 /++
6    IN:     mongo            - active connection to the MongoDB instance.
7            gridfsBucketName - full path to the GridFS parent of collections .files and .chunks.
8            id               - the file object ID, i.e., it's "_id" value.
9    RETURN: The entire file contents in an array.
10    THROWS: File not found, or on corrupt properties.
11 +/
12 
13 ubyte[] gridfsReadFile(MongoConnection mongo, const string gridfsBucketName, const ObjectId id) {
14     return gridfsReadManyFiles(mongo, gridfsBucketName, [id])[id];
15 }
16 
17 /++
18    IN:     mongo            - active connection to the MongoDB instance.
19            gridfsBucketName - full path to the GridFS parent of collections .files and .chunks.
20            name             - file name
21    RETURN: The entire file contents in an array.
22    THROWS: File not found, or if more than 1 file match name, or on corrupt properties.
23 +/
24 
25 ubyte[] gridfsReadFile(MongoConnection mongo, const string gridfsBucketName, const string name) {
26     return gridfsReadManyFiles(mongo, gridfsBucketName, [name])[name];
27 }
28 
29 /++
30    IN:     mongo            - active connection to the MongoDB instance.
31            gridfsBucketName - full path to the GridFS parent of collections .files and .chunks.
32            ids              - array of file object IDs, i.e., their "_id" values.
33    RETURN: An associative array containing id -> entire file content mappings.
34    THROWS: File not found, or on corrupt properties.
35 +/
36 
37 ubyte[][ObjectId] gridfsReadManyFiles(MongoConnection mongo, const string gridfsBucketName, const ObjectId[] ids) {
38     import std.algorithm;
39     import std.array;
40 
41     if (ids.length > int.max)
42         throw new Exception("Number of requested objects exceeds int.max");
43 
44     // {"$or": [{"_id": id1}, {"_id": id2}, ... {"_id": idN}]}
45     auto query = document([bson_value("$or", makeArray(ids, "_id"))]);
46 
47     OP_REPLY reply = mongo.query(gridfsBucketName ~ ".files", 0, cast(int)ids.length, query);
48     if (reply.errorCode != 0) {
49         throw new Exception(reply.errorMessage);
50     }
51 
52     if (reply.documents.length != ids.length) {
53         auto givenIds = ids.dup.sort();
54         auto existingIds = reply.documents.map!(doc => doc["_id"].get!ObjectId).array.sort();
55         auto missingIds = setDifference(givenIds, existingIds).map!(id => id.toString()).join(", ");
56         throw new Exception("Requested file(s) not found. Missing id: " ~ missingIds);
57     }
58 
59     return gridfsReadFilesById(mongo, gridfsBucketName, ids);
60 }
61 
62 /++
63    IN:     mongo            - active connection to the MongoDB instance.
64            gridfsBucketName - full path to the GridFS parent of collections .files and .chunks.
65            names            - array of file names.
66    RETURN: An associative array containing file name -> entire file content mappings.
67    THROWS: File not found, or if more than 1 file match name, or on corrupt properties.
68 +/
69 
70 ubyte[][string] gridfsReadManyFiles(MongoConnection mongo, const string gridfsBucketName, const string[] names) {
71     import std.algorithm;
72     import std.array;
73     import std.typecons;
74 
75     // {"$or": [{"filename": name1}, {"filename": name2}, ... {"filename": nameN}]}
76     auto query = document([bson_value("$or", makeArray(names, "filename"))]);
77 
78     OP_REPLY reply = mongo.query(gridfsBucketName ~ ".files", 0, int.max, query);
79     if (reply.errorCode != 0) {
80         throw new Exception(reply.errorMessage);
81     }
82 
83     if (reply.documents.length != names.length) {
84         auto givenNames = names.dup().sort();
85         auto existingNames = reply.documents.map!(doc => doc["filename"].get!string).array().sort();
86         auto missingNames = setDifference(givenNames, existingNames).join(", ");
87         throw new Exception("Requested file(s) not found. Missing file(s): " ~ missingNames);
88     }
89 
90     string[ObjectId] mappings = reply.documents
91         .map!(doc => tuple(doc["_id"].get!ObjectId, doc["filename"].get!string))
92         .assocArray();
93 
94     auto result = gridfsReadFilesById(mongo, gridfsBucketName, mappings.keys)
95         .byPair()
96         .map!(tup => tuple(mappings[tup[0]], tup[1]))
97         .assocArray();
98 
99     return result;
100 }
101 
102 private ubyte[][ObjectId] gridfsReadFilesById(MongoConnection mongo, const string gridfsBucketName,
103                                     const ObjectId[] ids) {
104     import std.outbuffer;
105     import std.algorithm;
106     import std.array;
107     import std.typecons;
108 
109     // {"$or": [{"files_id": id1}, {"files_id": id2}, ... {"files_id": idN}]}
110     auto query = bson_value("$query", document([bson_value("$or", makeArray(ids, "files_id"))]));
111     // order results by files_id and chunk number
112     auto orderBy = bson_value("$orderby", document([bson_value("files_id", 1), bson_value("n", 1)]));
113     auto queryDoc = document([query, orderBy]);
114 
115     string collectionName = gridfsBucketName ~ ".chunks";
116     OutBuffer[ObjectId] result;
117 
118     auto handleReply = (OP_REPLY reply) {
119         if (reply.errorCode != 0) {
120             throw new Exception(reply.errorMessage);
121         }
122         foreach (doc; reply.documents) {
123             if (doc["$err"] != bson_value.init) {
124                 throw new Exception(doc["$err"].get!string);
125             }
126             auto contents = result.require(doc["files_id"].get!ObjectId, new OutBuffer());
127             contents.write(doc["data"].get!(const(ubyte[])));
128         }
129     };
130 
131     OP_REPLY reply = mongo.query(collectionName, 0, int.max, queryDoc);
132     handleReply(reply);
133 
134     // fetch remaining documents if any
135     while (reply.cursorID != 0) {
136         reply = mongo.getMore(collectionName, int.max, reply.cursorID);
137         handleReply(reply);
138     }
139 
140     return result.byPair().map!(tup => tuple(tup[0], tup[1].toBytes())).assocArray();
141 }
142 
143 private bson_value[] makeArray(T)(T[] xs, string key) {
144     import std.algorithm;
145     import std.range;
146     import std.conv;
147 
148     return xs
149         .enumerate()
150         .map!(tup => bson_value(tup.index.to!string, document([bson_value(key, tup.value)])))
151         .array();
152 }