Branch data Line data Source code
1 : : /* Debuginfo-over-http server.
2 : : Copyright (C) 2019-2024 Red Hat, Inc.
3 : : Copyright (C) 2021, 2022 Mark J. Wielaard <mark@klomp.org>
4 : : This file is part of elfutils.
5 : :
6 : : This file is free software; you can redistribute it and/or modify
7 : : it under the terms of the GNU General Public License as published by
8 : : the Free Software Foundation; either version 3 of the License, or
9 : : (at your option) any later version.
10 : :
11 : : elfutils is distributed in the hope that it will be useful, but
12 : : WITHOUT ANY WARRANTY; without even the implied warranty of
13 : : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : : GNU General Public License for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 : :
19 : :
20 : : /* cargo-cult from libdwfl linux-kernel-modules.c */
21 : : /* In case we have a bad fts we include this before config.h because it
22 : : can't handle _FILE_OFFSET_BITS.
23 : : Everything we need here is fine if its declarations just come first.
24 : : Also, include sys/types.h before fts. On some systems fts.h is not self
25 : : contained. */
26 : : #ifdef BAD_FTS
27 : : #include <sys/types.h>
28 : : #include <fts.h>
29 : : #endif
30 : :
31 : : #ifdef HAVE_CONFIG_H
32 : : #include "config.h"
33 : : #endif
34 : :
35 : : // #define _GNU_SOURCE
36 : : #ifdef HAVE_SCHED_H
37 : : extern "C" {
38 : : #include <sched.h>
39 : : }
40 : : #endif
41 : : #ifdef HAVE_SYS_RESOURCE_H
42 : : extern "C" {
43 : : #include <sys/resource.h>
44 : : }
45 : : #endif
46 : :
47 : : #ifdef HAVE_EXECINFO_H
48 : : extern "C" {
49 : : #include <execinfo.h>
50 : : }
51 : : #endif
52 : : #ifdef HAVE_MALLOC_H
53 : : extern "C" {
54 : : #include <malloc.h>
55 : : }
56 : : #endif
57 : :
58 : : #include "debuginfod.h"
59 : : #include <dwarf.h>
60 : :
61 : : #include <argp.h>
62 : : #ifdef __GNUC__
63 : : #undef __attribute__ /* glibc bug - rhbz 1763325 */
64 : : #endif
65 : :
66 : : #ifdef USE_LZMA
67 : : #include <lzma.h>
68 : : #endif
69 : :
70 : : #include <unistd.h>
71 : : #include <stdlib.h>
72 : : #include <locale.h>
73 : : #include <pthread.h>
74 : : #include <signal.h>
75 : : #include <sys/stat.h>
76 : : #include <sys/time.h>
77 : : #include <sys/vfs.h>
78 : : #include <unistd.h>
79 : : #include <fcntl.h>
80 : : #include <netdb.h>
81 : : #include <math.h>
82 : : #include <float.h>
83 : : #include <fnmatch.h>
84 : : #include <arpa/inet.h>
85 : :
86 : :
87 : : /* If fts.h is included before config.h, its indirect inclusions may not
88 : : give us the right LFS aliases of these functions, so map them manually. */
89 : : #ifdef BAD_FTS
90 : : #ifdef _FILE_OFFSET_BITS
91 : : #define open open64
92 : : #define fopen fopen64
93 : : #endif
94 : : #else
95 : : #include <sys/types.h>
96 : : #include <fts.h>
97 : : #endif
98 : :
99 : : #include <cstring>
100 : : #include <vector>
101 : : #include <set>
102 : : #include <unordered_set>
103 : : #include <map>
104 : : #include <string>
105 : : #include <iostream>
106 : : #include <iomanip>
107 : : #include <ostream>
108 : : #include <sstream>
109 : : #include <mutex>
110 : : #include <deque>
111 : : #include <condition_variable>
112 : : #include <exception>
113 : : #include <thread>
114 : : // #include <regex> // on rhel7 gcc 4.8, not competent
115 : : #include <regex.h>
116 : : // #include <algorithm>
117 : : using namespace std;
118 : :
119 : : #include <gelf.h>
120 : : #include <libdwelf.h>
121 : :
122 : : #include <microhttpd.h>
123 : :
124 : : #if MHD_VERSION >= 0x00097002
125 : : // libmicrohttpd 0.9.71 broke API
126 : : #define MHD_RESULT enum MHD_Result
127 : : #else
128 : : #define MHD_RESULT int
129 : : #endif
130 : :
131 : : #ifdef ENABLE_IMA_VERIFICATION
132 : : #include <rpm/rpmlib.h>
133 : : #include <rpm/rpmfi.h>
134 : : #include <rpm/header.h>
135 : : #include <glob.h>
136 : : #endif
137 : :
138 : : #include <curl/curl.h>
139 : : #include <archive.h>
140 : : #include <archive_entry.h>
141 : : #include <sqlite3.h>
142 : :
143 : : #ifdef __linux__
144 : : #include <sys/syscall.h>
145 : : #endif
146 : :
147 : : #ifdef __linux__
148 : : #define tid() syscall(SYS_gettid)
149 : : #else
150 : : #define tid() pthread_self()
151 : : #endif
152 : :
153 : : extern "C" {
154 : : #include "printversion.h"
155 : : #include "system.h"
156 : : }
157 : : #include <json-c/json.h>
158 : :
159 : :
160 : : inline bool
161 : 149363 : string_endswith(const string& haystack, const string& needle)
162 : : {
163 [ + + ]: 149363 : return (haystack.size() >= needle.size() &&
164 : 146169 : equal(haystack.end()-needle.size(), haystack.end(),
165 : 149363 : needle.begin()));
166 : : }
167 : :
168 : :
169 : : // Roll this identifier for every sqlite schema incompatibility.
170 : : #define BUILDIDS "buildids10"
171 : :
172 : : #if SQLITE_VERSION_NUMBER >= 3008000
173 : : #define WITHOUT_ROWID "without rowid"
174 : : #else
175 : : #define WITHOUT_ROWID ""
176 : : #endif
177 : :
178 : : static const char DEBUGINFOD_SQLITE_DDL[] =
179 : : "pragma foreign_keys = on;\n"
180 : : "pragma synchronous = 0;\n" // disable fsync()s - this cache is disposable across a machine crash
181 : : "pragma journal_mode = wal;\n" // https://sqlite.org/wal.html
182 : : "pragma wal_checkpoint = truncate;\n" // clean out any preexisting wal file
183 : : "pragma journal_size_limit = 0;\n" // limit steady state file (between grooming, which also =truncate's)
184 : : "pragma auto_vacuum = incremental;\n" // https://sqlite.org/pragma.html
185 : : "pragma busy_timeout = 1000;\n" // https://sqlite.org/pragma.html
186 : : // NB: all these are overridable with -D option
187 : :
188 : : // Normalization table for interning file names
189 : : "create table if not exists " BUILDIDS "_fileparts (\n"
190 : : " id integer primary key not null,\n"
191 : : " name text unique not null\n"
192 : : " );\n"
193 : : "create table if not exists " BUILDIDS "_files (\n"
194 : : " id integer primary key not null,\n"
195 : : " dirname integer not null,\n"
196 : : " basename integer not null,\n"
197 : : " unique (dirname, basename),\n"
198 : : " foreign key (dirname) references " BUILDIDS "_fileparts(id) on delete cascade,\n"
199 : : " foreign key (basename) references " BUILDIDS "_fileparts(id) on delete cascade\n"
200 : : " );\n"
201 : : "create view if not exists " BUILDIDS "_files_v as\n" // a
202 : : " select f.id, n1.name || '/' || n2.name as name\n"
203 : : " from " BUILDIDS "_files f, " BUILDIDS "_fileparts n1, " BUILDIDS "_fileparts n2\n"
204 : : " where f.dirname = n1.id and f.basename = n2.id;\n"
205 : :
206 : : // Normalization table for interning buildids
207 : : "create table if not exists " BUILDIDS "_buildids (\n"
208 : : " id integer primary key not null,\n"
209 : : " hex text unique not null);\n"
210 : : // Track the completion of scanning of a given file & sourcetype at given time
211 : : "create table if not exists " BUILDIDS "_file_mtime_scanned (\n"
212 : : " mtime integer not null,\n"
213 : : " file integer not null,\n"
214 : : " size integer not null,\n" // in bytes
215 : : " sourcetype text(1) not null\n"
216 : : " check (sourcetype IN ('F', 'R')),\n"
217 : : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
218 : : " primary key (file, mtime, sourcetype)\n"
219 : : " ) " WITHOUT_ROWID ";\n"
220 : : "create table if not exists " BUILDIDS "_f_de (\n"
221 : : " buildid integer not null,\n"
222 : : " debuginfo_p integer not null,\n"
223 : : " executable_p integer not null,\n"
224 : : " file integer not null,\n"
225 : : " mtime integer not null,\n"
226 : : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
227 : : " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
228 : : " primary key (buildid, file, mtime)\n"
229 : : " ) " WITHOUT_ROWID ";\n"
230 : : // Index for faster delete by file identifier and metadata searches
231 : : "create index if not exists " BUILDIDS "_f_de_idx on " BUILDIDS "_f_de (file, mtime);\n"
232 : : "create table if not exists " BUILDIDS "_f_s (\n"
233 : : " buildid integer not null,\n"
234 : : " artifactsrc integer not null,\n"
235 : : " file integer not null,\n" // NB: not necessarily entered into _mtime_scanned
236 : : " mtime integer not null,\n"
237 : : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
238 : : " foreign key (artifactsrc) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
239 : : " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
240 : : " primary key (buildid, artifactsrc, file, mtime)\n"
241 : : " ) " WITHOUT_ROWID ";\n"
242 : : "create table if not exists " BUILDIDS "_r_de (\n"
243 : : " buildid integer not null,\n"
244 : : " debuginfo_p integer not null,\n"
245 : : " executable_p integer not null,\n"
246 : : " file integer not null,\n"
247 : : " mtime integer not null,\n"
248 : : " content integer not null,\n"
249 : : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
250 : : " foreign key (content) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
251 : : " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
252 : : " primary key (buildid, debuginfo_p, executable_p, file, content, mtime)\n"
253 : : " ) " WITHOUT_ROWID ";\n"
254 : : // Index for faster delete by archive file identifier
255 : : "create index if not exists " BUILDIDS "_r_de_idx on " BUILDIDS "_r_de (file, mtime);\n"
256 : : // Index for metadata searches
257 : : "create index if not exists " BUILDIDS "_r_de_idx2 on " BUILDIDS "_r_de (content);\n"
258 : : "create table if not exists " BUILDIDS "_r_sref (\n" // outgoing dwarf sourcefile references from rpm
259 : : " buildid integer not null,\n"
260 : : " artifactsrc integer not null,\n"
261 : : " foreign key (artifactsrc) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
262 : : " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
263 : : " primary key (buildid, artifactsrc)\n"
264 : : " ) " WITHOUT_ROWID ";\n"
265 : : "create table if not exists " BUILDIDS "_r_sdef (\n" // rpm contents that may satisfy sref
266 : : " file integer not null,\n"
267 : : " mtime integer not null,\n"
268 : : " content integer not null,\n"
269 : : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
270 : : " foreign key (content) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
271 : : " primary key (content, file, mtime)\n"
272 : : " ) " WITHOUT_ROWID ";\n"
273 : : "create table if not exists " BUILDIDS "_r_seekable (\n" // seekable rpm contents
274 : : " file integer not null,\n"
275 : : " content integer not null,\n"
276 : : " type text not null,\n"
277 : : " size integer not null,\n"
278 : : " offset integer not null,\n"
279 : : " mtime integer not null,\n"
280 : : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
281 : : " foreign key (content) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
282 : : " primary key (file, content)\n"
283 : : " ) " WITHOUT_ROWID ";\n"
284 : : // create views to glue together some of the above tables, for webapi D queries
285 : : // NB: _query_d2 and _query_e2 were added to replace _query_d and _query_e
286 : : // without updating BUILDIDS. They can be renamed back the next time BUILDIDS
287 : : // is updated.
288 : : "create view if not exists " BUILDIDS "_query_d2 as \n"
289 : : "select\n"
290 : : " b.hex as buildid, 'F' as sourcetype, n.file as id0, f0.name as source0, n.mtime as mtime, null as id1, null as source1\n"
291 : : " from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_f_de n\n"
292 : : " where b.id = n.buildid and f0.id = n.file and n.debuginfo_p = 1\n"
293 : : "union all select\n"
294 : : " b.hex as buildid, 'R' as sourcetype, n.file as id0, f0.name as source0, n.mtime as mtime, n.content as id1, f1.name as source1\n"
295 : : " from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_files_v f1, " BUILDIDS "_r_de n\n"
296 : : " where b.id = n.buildid and f0.id = n.file and f1.id = n.content and n.debuginfo_p = 1\n"
297 : : ";"
298 : : // ... and for E queries
299 : : "create view if not exists " BUILDIDS "_query_e2 as \n"
300 : : "select\n"
301 : : " b.hex as buildid, 'F' as sourcetype, n.file as id0, f0.name as source0, n.mtime as mtime, null as id1, null as source1\n"
302 : : " from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_f_de n\n"
303 : : " where b.id = n.buildid and f0.id = n.file and n.executable_p = 1\n"
304 : : "union all select\n"
305 : : " b.hex as buildid, 'R' as sourcetype, n.file as id0, f0.name as source0, n.mtime as mtime, n.content as id1, f1.name as source1\n"
306 : : " from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_files_v f1, " BUILDIDS "_r_de n\n"
307 : : " where b.id = n.buildid and f0.id = n.file and f1.id = n.content and n.executable_p = 1\n"
308 : : ";"
309 : : // ... and for S queries
310 : : "create view if not exists " BUILDIDS "_query_s as \n"
311 : : "select\n"
312 : : " b.hex as buildid, fs.name as artifactsrc, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1, null as source0ref\n"
313 : : " from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_files_v fs, " BUILDIDS "_f_s n\n"
314 : : " where b.id = n.buildid and f0.id = n.file and fs.id = n.artifactsrc\n"
315 : : "union all select\n"
316 : : " b.hex as buildid, f1.name as artifactsrc, 'R' as sourcetype, f0.name as source0, sd.mtime as mtime, f1.name as source1, fsref.name as source0ref\n"
317 : : " from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_files_v f1, " BUILDIDS "_files_v fsref, "
318 : : " " BUILDIDS "_r_sdef sd, " BUILDIDS "_r_sref sr, " BUILDIDS "_r_de sde\n"
319 : : " where b.id = sr.buildid and f0.id = sd.file and fsref.id = sde.file and f1.id = sd.content\n"
320 : : " and sr.artifactsrc = sd.content and sde.buildid = sr.buildid\n"
321 : : ";"
322 : : // and for startup overview counts
323 : : "drop view if exists " BUILDIDS "_stats;\n"
324 : : "create view if not exists " BUILDIDS "_stats as\n"
325 : : " select 'file d/e' as label,count(*) as quantity from " BUILDIDS "_f_de\n"
326 : : "union all select 'file s',count(*) from " BUILDIDS "_f_s\n"
327 : : "union all select 'archive d/e',count(*) from " BUILDIDS "_r_de\n"
328 : : "union all select 'archive sref',count(*) from " BUILDIDS "_r_sref\n"
329 : : "union all select 'archive sdef',count(*) from " BUILDIDS "_r_sdef\n"
330 : : "union all select 'buildids',count(*) from " BUILDIDS "_buildids\n"
331 : : "union all select 'filenames',count(*) from " BUILDIDS "_files\n"
332 : : "union all select 'fileparts',count(*) from " BUILDIDS "_fileparts\n"
333 : : "union all select 'files scanned (#)',count(*) from " BUILDIDS "_file_mtime_scanned\n"
334 : : "union all select 'files scanned (mb)',coalesce(sum(size)/1024/1024,0) from " BUILDIDS "_file_mtime_scanned\n"
335 : : #if SQLITE_VERSION_NUMBER >= 3016000
336 : : "union all select 'index db size (mb)',page_count*page_size/1024/1024 as size FROM pragma_page_count(), pragma_page_size()\n"
337 : : #endif
338 : : ";\n"
339 : :
340 : : // schema change history & garbage collection
341 : : //
342 : : // XXX: we could have migration queries here to bring prior-schema
343 : : // data over instead of just dropping it. But that could incur
344 : : // doubled storage costs.
345 : : //
346 : : // buildids10: split the _files table into _parts
347 : : "" // <<< we are here
348 : : // buildids9: widen the mtime_scanned table
349 : : "DROP VIEW IF EXISTS buildids9_stats;\n"
350 : : "DROP INDEX IF EXISTS buildids9_r_de_idx;\n"
351 : : "DROP INDEX IF EXISTS buildids9_f_de_idx;\n"
352 : : "DROP VIEW IF EXISTS buildids9_query_s;\n"
353 : : "DROP VIEW IF EXISTS buildids9_query_e;\n"
354 : : "DROP VIEW IF EXISTS buildids9_query_d;\n"
355 : : "DROP TABLE IF EXISTS buildids9_r_sdef;\n"
356 : : "DROP TABLE IF EXISTS buildids9_r_sref;\n"
357 : : "DROP TABLE IF EXISTS buildids9_r_de;\n"
358 : : "DROP TABLE IF EXISTS buildids9_f_s;\n"
359 : : "DROP TABLE IF EXISTS buildids9_f_de;\n"
360 : : "DROP TABLE IF EXISTS buildids9_file_mtime_scanned;\n"
361 : : "DROP TABLE IF EXISTS buildids9_buildids;\n"
362 : : "DROP TABLE IF EXISTS buildids9_files;\n"
363 : : // buildids8: slim the sref table
364 : : "drop table if exists buildids8_f_de;\n"
365 : : "drop table if exists buildids8_f_s;\n"
366 : : "drop table if exists buildids8_r_de;\n"
367 : : "drop table if exists buildids8_r_sref;\n"
368 : : "drop table if exists buildids8_r_sdef;\n"
369 : : "drop table if exists buildids8_file_mtime_scanned;\n"
370 : : "drop table if exists buildids8_files;\n"
371 : : "drop table if exists buildids8_buildids;\n"
372 : : // buildids7: separate _norm table into dense subtype tables
373 : : "drop table if exists buildids7_f_de;\n"
374 : : "drop table if exists buildids7_f_s;\n"
375 : : "drop table if exists buildids7_r_de;\n"
376 : : "drop table if exists buildids7_r_sref;\n"
377 : : "drop table if exists buildids7_r_sdef;\n"
378 : : "drop table if exists buildids7_file_mtime_scanned;\n"
379 : : "drop table if exists buildids7_files;\n"
380 : : "drop table if exists buildids7_buildids;\n"
381 : : // buildids6: drop bolo/rfolo again, represent sources / rpmcontents in main table
382 : : "drop table if exists buildids6_norm;\n"
383 : : "drop table if exists buildids6_files;\n"
384 : : "drop table if exists buildids6_buildids;\n"
385 : : "drop view if exists buildids6;\n"
386 : : // buildids5: redefine srcfile1 column to be '.'-less (for rpms)
387 : : "drop table if exists buildids5_norm;\n"
388 : : "drop table if exists buildids5_files;\n"
389 : : "drop table if exists buildids5_buildids;\n"
390 : : "drop table if exists buildids5_bolo;\n"
391 : : "drop table if exists buildids5_rfolo;\n"
392 : : "drop view if exists buildids5;\n"
393 : : // buildids4: introduce rpmfile RFOLO
394 : : "drop table if exists buildids4_norm;\n"
395 : : "drop table if exists buildids4_files;\n"
396 : : "drop table if exists buildids4_buildids;\n"
397 : : "drop table if exists buildids4_bolo;\n"
398 : : "drop table if exists buildids4_rfolo;\n"
399 : : "drop view if exists buildids4;\n"
400 : : // buildids3*: split out srcfile BOLO
401 : : "drop table if exists buildids3_norm;\n"
402 : : "drop table if exists buildids3_files;\n"
403 : : "drop table if exists buildids3_buildids;\n"
404 : : "drop table if exists buildids3_bolo;\n"
405 : : "drop view if exists buildids3;\n"
406 : : // buildids2: normalized buildid and filenames into interning tables;
407 : : "drop table if exists buildids2_norm;\n"
408 : : "drop table if exists buildids2_files;\n"
409 : : "drop table if exists buildids2_buildids;\n"
410 : : "drop view if exists buildids2;\n"
411 : : // buildids1: made buildid and artifacttype NULLable, to represent cached-negative
412 : : // lookups from sources, e.g. files or rpms that contain no buildid-indexable content
413 : : "drop table if exists buildids1;\n"
414 : : // buildids: original
415 : : "drop table if exists buildids;\n"
416 : : ;
417 : :
418 : : static const char DEBUGINFOD_SQLITE_CLEANUP_DDL[] =
419 : : "pragma wal_checkpoint = truncate;\n" // clean out any preexisting wal file
420 : : ;
421 : :
422 : :
423 : :
424 : :
425 : : /* Name and version of program. */
426 : : ARGP_PROGRAM_VERSION_HOOK_DEF = print_version;
427 : :
428 : : /* Bug report address. */
429 : : ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT;
430 : :
431 : : /* Definitions of arguments for argp functions. */
432 : : static const struct argp_option options[] =
433 : : {
434 : : { NULL, 0, NULL, 0, "Scanners:", 1 },
435 : : { "scan-file-dir", 'F', NULL, 0, "Enable ELF/DWARF file scanning.", 0 },
436 : : { "scan-rpm-dir", 'R', NULL, 0, "Enable RPM scanning.", 0 },
437 : : { "scan-deb-dir", 'U', NULL, 0, "Enable DEB scanning.", 0 },
438 : : { "scan-archive", 'Z', "EXT=CMD", 0, "Enable arbitrary archive scanning.", 0 },
439 : : // "source-oci-imageregistry" ...
440 : :
441 : : { NULL, 0, NULL, 0, "Options:", 2 },
442 : : { "logical", 'L', NULL, 0, "Follow symlinks, default=ignore.", 0 },
443 : : { "max-depth", 'M', "LEVELS", 0, "Depth of directory levels to descend into, default=no-limit.", 0 },
444 : : { "rescan-time", 't', "SECONDS", 0, "Number of seconds to wait between rescans, 0=disable.", 0 },
445 : : { "groom-time", 'g', "SECONDS", 0, "Number of seconds to wait between database grooming, 0=disable.", 0 },
446 : : { "maxigroom", 'G', NULL, 0, "Run a complete database groom/shrink pass at startup.", 0 },
447 : : { "concurrency", 'c', "NUM", 0, "Limit scanning thread concurrency to NUM, default=#CPUs.", 0 },
448 : : { "connection-pool", 'C', "NUM", OPTION_ARG_OPTIONAL,
449 : : "Use webapi connection pool with NUM threads, default=unlim.", 0 },
450 : : { "include", 'I', "REGEX", 0, "Include files matching REGEX, default=all.", 0 },
451 : : { "exclude", 'X', "REGEX", 0, "Exclude files matching REGEX, default=none.", 0 },
452 : : { "port", 'p', "NUM", 0, "HTTP port to listen on, default 8002.", 0 },
453 : : #define ARGP_KEY_CORS 0x1000
454 : : { "cors", ARGP_KEY_CORS, NULL, 0, "Add CORS response headers to HTTP queries, default no.", 0 },
455 : : { "database", 'd', "FILE", 0, "Path to sqlite database.", 0 },
456 : : { "ddl", 'D', "SQL", 0, "Apply extra sqlite ddl/pragma to connection.", 0 },
457 : : { "verbose", 'v', NULL, 0, "Increase verbosity.", 0 },
458 : : { "regex-groom", 'r', NULL, 0,"Uses regexes from -I and -X arguments to groom the database.",0},
459 : : #define ARGP_KEY_FDCACHE_FDS 0x1001
460 : : { "fdcache-fds", ARGP_KEY_FDCACHE_FDS, "NUM", OPTION_HIDDEN, NULL, 0 },
461 : : #define ARGP_KEY_FDCACHE_MBS 0x1002
462 : : { "fdcache-mbs", ARGP_KEY_FDCACHE_MBS, "MB", 0, "Maximum total size of archive file fdcache.", 0 },
463 : : #define ARGP_KEY_FDCACHE_PREFETCH 0x1003
464 : : { "fdcache-prefetch", ARGP_KEY_FDCACHE_PREFETCH, "NUM", 0, "Number of archive files to prefetch into fdcache.", 0 },
465 : : #define ARGP_KEY_FDCACHE_MINTMP 0x1004
466 : : { "fdcache-mintmp", ARGP_KEY_FDCACHE_MINTMP, "NUM", 0, "Minimum free space% on tmpdir.", 0 },
467 : : #define ARGP_KEY_FDCACHE_PREFETCH_MBS 0x1005
468 : : { "fdcache-prefetch-mbs", ARGP_KEY_FDCACHE_PREFETCH_MBS, "MB", OPTION_HIDDEN, NULL, 0},
469 : : #define ARGP_KEY_FDCACHE_PREFETCH_FDS 0x1006
470 : : { "fdcache-prefetch-fds", ARGP_KEY_FDCACHE_PREFETCH_FDS, "NUM", OPTION_HIDDEN, NULL, 0},
471 : : #define ARGP_KEY_FORWARDED_TTL_LIMIT 0x1007
472 : : {"forwarded-ttl-limit", ARGP_KEY_FORWARDED_TTL_LIMIT, "NUM", 0, "Limit of X-Forwarded-For hops, default 8.", 0},
473 : : #define ARGP_KEY_PASSIVE 0x1008
474 : : { "passive", ARGP_KEY_PASSIVE, NULL, 0, "Do not scan or groom, read-only database.", 0 },
475 : : #define ARGP_KEY_DISABLE_SOURCE_SCAN 0x1009
476 : : { "disable-source-scan", ARGP_KEY_DISABLE_SOURCE_SCAN, NULL, 0, "Do not scan dwarf source info.", 0 },
477 : : #define ARGP_SCAN_CHECKPOINT 0x100A
478 : : { "scan-checkpoint", ARGP_SCAN_CHECKPOINT, "NUM", 0, "Number of files scanned before a WAL checkpoint.", 0 },
479 : : #ifdef ENABLE_IMA_VERIFICATION
480 : : #define ARGP_KEY_KOJI_SIGCACHE 0x100B
481 : : { "koji-sigcache", ARGP_KEY_KOJI_SIGCACHE, NULL, 0, "Do a koji specific mapping of rpm paths to get IMA signatures.", 0 },
482 : : #endif
483 : : #define ARGP_KEY_METADATA_MAXTIME 0x100C
484 : : { "metadata-maxtime", ARGP_KEY_METADATA_MAXTIME, "SECONDS", 0,
485 : : "Number of seconds to limit metadata query run time, 0=unlimited.", 0 },
486 : : #define ARGP_KEY_HTTP_ADDR 0x100D
487 : : { "listen-address", ARGP_KEY_HTTP_ADDR, "ADDR", 0, "HTTP address to listen on.", 0 },
488 : : { "home-redirect", 'h', "URL", 0, "Custom homepage - redirect.", 0 },
489 : : { "home-html", 'H', "FILE", 0, "Custom homepage - htmlfile.", 0 },
490 : : { NULL, 0, NULL, 0, NULL, 0 },
491 : : };
492 : :
493 : : /* Short description of program. */
494 : : static const char doc[] = "Serve debuginfo-related content across HTTP from files under PATHs.";
495 : :
496 : : /* Strings for arguments in help texts. */
497 : : static const char args_doc[] = "[PATH ...]";
498 : :
499 : : /* Prototype for option handler. */
500 : : static error_t parse_opt (int key, char *arg, struct argp_state *state);
501 : :
502 : : static unsigned default_concurrency();
503 : :
504 : : /* Data structure to communicate with argp functions. */
505 : : static struct argp argp =
506 : : {
507 : : options, parse_opt, args_doc, doc, NULL, NULL, NULL
508 : : };
509 : :
510 : :
511 : : static string db_path;
512 : : static sqlite3 *db; // single connection, serialized across all our threads!
513 : : static sqlite3 *dbq; // webapi query-servicing readonly connection, serialized ditto!
514 : : static unsigned verbose;
515 : : static volatile sig_atomic_t interrupted = 0;
516 : : static volatile sig_atomic_t forced_rescan_count = 0;
517 : : static volatile sig_atomic_t sigusr1 = 0;
518 : : static volatile sig_atomic_t forced_groom_count = 0;
519 : : static volatile sig_atomic_t sigusr2 = 0;
520 : : static unsigned http_port = 8002;
521 : : static struct sockaddr_in6 http_sockaddr;
522 : : static string addr_info = "";
523 : : static bool webapi_cors = false;
524 : : static int max_depth = -1;
525 : : static unsigned rescan_s = 300;
526 : : static unsigned groom_s = 86400;
527 : : static bool maxigroom = false;
528 : : static unsigned concurrency = default_concurrency();
529 : : static int connection_pool = 0;
530 : : static set<string> source_paths;
531 : : static bool scan_files = false;
532 : : static map<string,string> scan_archives;
533 : : static vector<string> extra_ddl;
534 : : static regex_t file_include_regex;
535 : : static regex_t file_exclude_regex;
536 : : static bool regex_groom = false;
537 : : static bool traverse_logical;
538 : : static long fdcache_mbs;
539 : : static long fdcache_prefetch;
540 : : static long fdcache_mintmp;
541 : : static unsigned forwarded_ttl_limit = 8;
542 : : static bool scan_source_info = true;
543 : : static string tmpdir;
544 : : static bool passive_p = false;
545 : : static long scan_checkpoint = 256;
546 : : #ifdef ENABLE_IMA_VERIFICATION
547 : : static bool requires_koji_sigcache_mapping = false;
548 : : #endif
549 : : static unsigned metadata_maxtime_s = 5;
550 : : static string cust_homepage_redirect = "";
551 : : static string cust_homepage_file = "";
552 : :
553 : : static void set_metric(const string& key, double value);
554 : : static void inc_metric(const string& key);
555 : : static void add_metric(const string& metric,
556 : : double value);
557 : : static void set_metric(const string& metric,
558 : : const string& lname, const string& lvalue,
559 : : double value);
560 : : static void inc_metric(const string& metric,
561 : : const string& lname, const string& lvalue);
562 : : static void add_metric(const string& metric,
563 : : const string& lname, const string& lvalue,
564 : : double value);
565 : : static void inc_metric(const string& metric,
566 : : const string& lname, const string& lvalue,
567 : : const string& rname, const string& rvalue);
568 : : static void add_metric(const string& metric,
569 : : const string& lname, const string& lvalue,
570 : : const string& rname, const string& rvalue,
571 : : double value);
572 : :
573 : :
574 : : class tmp_inc_metric { // a RAII style wrapper for exception-safe scoped increment & decrement
575 : : string m, n, v;
576 : : public:
577 : 3383 : tmp_inc_metric(const string& mname, const string& lname, const string& lvalue):
578 [ + - + - ]: 3383 : m(mname), n(lname), v(lvalue)
579 : : {
580 [ + - ]: 3383 : add_metric (m, n, v, 1);
581 : 3383 : }
582 : 3383 : ~tmp_inc_metric()
583 : : {
584 : 3383 : add_metric (m, n, v, -1);
585 : 3383 : }
586 : : };
587 : :
588 : : class tmp_ms_metric { // a RAII style wrapper for exception-safe scoped timing
589 : : string m, n, v;
590 : : struct timespec ts_start;
591 : : public:
592 : 375326 : tmp_ms_metric(const string& mname, const string& lname, const string& lvalue):
593 [ + - + - ]: 375326 : m(mname), n(lname), v(lvalue)
594 : : {
595 : 375332 : clock_gettime (CLOCK_MONOTONIC, & ts_start);
596 : 375699 : }
597 : 376672 : ~tmp_ms_metric()
598 : : {
599 : 376672 : struct timespec ts_end;
600 : 376672 : clock_gettime (CLOCK_MONOTONIC, & ts_end);
601 : 376857 : double deltas = (ts_end.tv_sec - ts_start.tv_sec)
602 : 376857 : + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9;
603 : :
604 : 376857 : add_metric (m + "_milliseconds_sum", n, v, (deltas*1000.0));
605 : 377029 : inc_metric (m + "_milliseconds_count", n, v);
606 : 376978 : }
607 : : };
608 : :
609 : :
610 : : /* Handle program arguments. */
611 : : static error_t
612 : 1276 : parse_opt (int key, char *arg,
613 : : struct argp_state *state __attribute__ ((unused)))
614 : : {
615 : 1276 : int rc;
616 [ + + + + : 1276 : switch (key)
+ + + + +
+ - - + +
- - + + +
+ + + + +
+ + - + -
- + + + ]
617 : : {
618 : 308 : case 'v': verbose ++; break;
619 : 80 : case 'd':
620 : : /* When using the in-memory database make sure it is shareable,
621 : : so we can open it twice as read/write and read-only. */
622 [ + + ]: 80 : if (strcmp (arg, ":memory:") == 0)
623 : 14 : db_path = "file::memory:?cache=shared";
624 : : else
625 : 66 : db_path = string(arg);
626 : : break;
627 : 84 : case 'p': http_port = (unsigned) atoi(arg);
628 [ + - ]: 84 : if (http_port == 0 || http_port > 65535)
629 : 0 : argp_failure(state, 1, EINVAL, "port number");
630 : : break;
631 : 6 : case ARGP_KEY_CORS:
632 : 6 : webapi_cors = true;
633 : 6 : break;
634 : 50 : case 'F': scan_files = true; break;
635 : 24 : case 'R':
636 [ + - + - ]: 24 : scan_archives[".rpm"]="cat"; // libarchive groks rpm natively
637 : 24 : break;
638 : 18 : case 'U':
639 [ + - + - ]: 18 : scan_archives[".deb"]="(bsdtar -O -x -f - data.tar\\*)<";
640 [ + - + - ]: 18 : scan_archives[".ddeb"]="(bsdtar -O -x -f - data.tar\\*)<";
641 [ + - + - ]: 18 : scan_archives[".ipk"]="(bsdtar -O -x -f - data.tar\\*)<";
642 : : // .udeb too?
643 : 18 : break;
644 : 40 : case 'Z':
645 : 40 : {
646 [ - + ]: 40 : char* extension = strchr(arg, '=');
647 [ - + ]: 40 : if (arg[0] == '\0')
648 : 0 : argp_failure(state, 1, EINVAL, "missing EXT");
649 [ + + ]: 40 : else if (extension)
650 [ + - + - ]: 20 : scan_archives[string(arg, (extension-arg))]=string(extension+1);
651 : : else
652 [ + - + - ]: 20 : scan_archives[string(arg)]=string("cat");
653 : : }
654 : : break;
655 : 8 : case 'L':
656 [ - + ]: 8 : if (passive_p)
657 : 0 : argp_failure(state, 1, EINVAL, "-L option inconsistent with passive mode");
658 : 8 : traverse_logical = true;
659 : 8 : break;
660 : 0 : case 'D':
661 [ # # ]: 0 : if (passive_p)
662 : 0 : argp_failure(state, 1, EINVAL, "-D option inconsistent with passive mode");
663 : 0 : extra_ddl.push_back(string(arg));
664 : 0 : break;
665 : 0 : case 'M':
666 [ # # ]: 0 : if (passive_p)
667 : 0 : argp_failure(state, 1, EINVAL, "-M option inconsistent with passive mode");
668 : 0 : max_depth = atoi(arg);
669 [ # # ]: 0 : if (max_depth < 0)
670 : 0 : argp_failure(state, 1, EINVAL, "-M LEVELS needs to be at least 0");
671 : : break;
672 : 64 : case 't':
673 [ - + ]: 64 : if (passive_p)
674 : 0 : argp_failure(state, 1, EINVAL, "-t option inconsistent with passive mode");
675 : 64 : rescan_s = (unsigned) atoi(arg);
676 : 64 : break;
677 : 64 : case 'g':
678 [ - + ]: 64 : if (passive_p)
679 : 0 : argp_failure(state, 1, EINVAL, "-g option inconsistent with passive mode");
680 : 64 : groom_s = (unsigned) atoi(arg);
681 : 64 : break;
682 : 0 : case 'G':
683 [ # # ]: 0 : if (passive_p)
684 : 0 : argp_failure(state, 1, EINVAL, "-G option inconsistent with passive mode");
685 : 0 : maxigroom = true;
686 : 0 : break;
687 : 0 : case 'c':
688 [ # # ]: 0 : if (passive_p)
689 : 0 : argp_failure(state, 1, EINVAL, "-c option inconsistent with passive mode");
690 : 0 : concurrency = (unsigned) atoi(arg);
691 [ # # ]: 0 : if (concurrency < 1) concurrency = 1;
692 : : break;
693 : 6 : case 'C':
694 [ + + ]: 6 : if (arg)
695 : : {
696 : 4 : connection_pool = atoi(arg);
697 [ + - ]: 4 : if (connection_pool < 2)
698 : 0 : argp_failure(state, 1, EINVAL, "-C NUM minimum 2");
699 : : }
700 : : break;
701 : 4 : case 'I':
702 : : // NB: no problem with unconditional free here - an earlier failed regcomp would exit program
703 [ - + ]: 4 : if (passive_p)
704 : 0 : argp_failure(state, 1, EINVAL, "-I option inconsistent with passive mode");
705 : 4 : regfree (&file_include_regex);
706 : 4 : rc = regcomp (&file_include_regex, arg, REG_EXTENDED|REG_NOSUB);
707 [ + - ]: 4 : if (rc != 0)
708 : 0 : argp_failure(state, 1, EINVAL, "regular expression");
709 : : break;
710 : 6 : case 'X':
711 [ - + ]: 6 : if (passive_p)
712 : 0 : argp_failure(state, 1, EINVAL, "-X option inconsistent with passive mode");
713 : 6 : regfree (&file_exclude_regex);
714 : 6 : rc = regcomp (&file_exclude_regex, arg, REG_EXTENDED|REG_NOSUB);
715 [ + - ]: 6 : if (rc != 0)
716 : 0 : argp_failure(state, 1, EINVAL, "regular expression");
717 : : break;
718 : 4 : case 'r':
719 [ - + ]: 4 : if (passive_p)
720 : 0 : argp_failure(state, 1, EINVAL, "-r option inconsistent with passive mode");
721 : 4 : regex_groom = true;
722 : 4 : break;
723 : : case ARGP_KEY_FDCACHE_FDS:
724 : : // deprecated
725 : : break;
726 : 4 : case ARGP_KEY_FDCACHE_MBS:
727 : 4 : fdcache_mbs = atol (arg);
728 : 4 : break;
729 : 4 : case ARGP_KEY_FDCACHE_PREFETCH:
730 : 4 : fdcache_prefetch = atol (arg);
731 : 4 : break;
732 : 4 : case ARGP_KEY_FDCACHE_MINTMP:
733 : 4 : fdcache_mintmp = atol (arg);
734 [ + - ]: 4 : if( fdcache_mintmp > 100 || fdcache_mintmp < 0 )
735 : 0 : argp_failure(state, 1, EINVAL, "fdcache mintmp percent");
736 : : break;
737 : 4 : case ARGP_KEY_FORWARDED_TTL_LIMIT:
738 : 4 : forwarded_ttl_limit = (unsigned) atoi(arg);
739 : 4 : break;
740 : 110 : case ARGP_KEY_ARG:
741 [ + - ]: 110 : source_paths.insert(string(arg));
742 : 110 : break;
743 : : case ARGP_KEY_FDCACHE_PREFETCH_FDS:
744 : : // deprecated
745 : : break;
746 : : case ARGP_KEY_FDCACHE_PREFETCH_MBS:
747 : : // deprecated
748 : : break;
749 : 2 : case ARGP_KEY_PASSIVE:
750 : 2 : passive_p = true;
751 [ + - ]: 2 : if (source_paths.size() > 0
752 [ + - ]: 2 : || maxigroom
753 [ + - ]: 2 : || extra_ddl.size() > 0
754 [ + - + - ]: 4 : || traverse_logical)
755 : : // other conflicting options tricky to check
756 : 0 : argp_failure(state, 1, EINVAL, "inconsistent options with passive mode");
757 : : break;
758 : 0 : case ARGP_KEY_DISABLE_SOURCE_SCAN:
759 : 0 : scan_source_info = false;
760 : 0 : break;
761 : 2 : case ARGP_SCAN_CHECKPOINT:
762 : 2 : scan_checkpoint = atol (arg);
763 [ + - ]: 2 : if (scan_checkpoint < 0)
764 : 0 : argp_failure(state, 1, EINVAL, "scan checkpoint");
765 : : break;
766 : 0 : case ARGP_KEY_METADATA_MAXTIME:
767 : 0 : metadata_maxtime_s = (unsigned) atoi(arg);
768 : 0 : break;
769 : : #ifdef ENABLE_IMA_VERIFICATION
770 : : case ARGP_KEY_KOJI_SIGCACHE:
771 : : requires_koji_sigcache_mapping = true;
772 : : break;
773 : : #endif
774 : 0 : case ARGP_KEY_HTTP_ADDR:
775 [ # # ]: 0 : if (inet_pton(AF_INET, arg, &(((sockaddr_in*)&http_sockaddr)->sin_addr)) == 1)
776 : 0 : http_sockaddr.sin6_family = AF_INET;
777 : : else
778 [ # # ]: 0 : if (inet_pton(AF_INET6, arg, &http_sockaddr.sin6_addr) == 1)
779 : 0 : http_sockaddr.sin6_family = AF_INET6;
780 : : else
781 : 0 : argp_failure(state, 1, EINVAL, "listen-address");
782 : 0 : addr_info = arg;
783 : 0 : break;
784 : : // case 'h': argp_state_help (state, stderr, ARGP_HELP_LONG|ARGP_HELP_EXIT_OK);
785 : 4 : case 'h':
786 : 4 : cust_homepage_redirect = arg;
787 : 4 : break;
788 : 4 : case 'H':
789 : 4 : cust_homepage_file = arg;
790 : 4 : break;
791 : : default: return ARGP_ERR_UNKNOWN;
792 : : }
793 : :
794 : : return 0;
795 : : }
796 : :
797 : :
798 : : ////////////////////////////////////////////////////////////////////////
799 : :
800 : :
801 : : static void add_mhd_response_header (struct MHD_Response *r,
802 : : const char *h, const char *v);
803 : :
804 : : // represent errors that may get reported to an ostream and/or a libmicrohttpd connection
805 : :
806 : 8 : struct reportable_exception
807 : : {
808 : : int code;
809 : : string message;
810 : :
811 [ - - + - : 106 : reportable_exception(int c, const string& m): code(c), message(m) {}
- - + - +
- ]
812 [ - - - - : 602 : reportable_exception(const string& m): code(503), message(m) {}
- - - - -
- - - - -
+ - - - -
- + - - -
- - - - -
- - - - -
- - - - -
- - - - -
- - + - -
- ]
813 : : reportable_exception(): code(503), message() {}
814 : :
815 : : void report(ostream& o) const; // defined under obatched() class below
816 : :
817 : 630 : MHD_RESULT mhd_send_response(MHD_Connection* c) const {
818 : 1260 : MHD_Response* r = MHD_create_response_from_buffer (message.size(),
819 : 630 : (void*) message.c_str(),
820 : : MHD_RESPMEM_MUST_COPY);
821 : 630 : add_mhd_response_header (r, "Content-Type", "text/plain");
822 : 630 : MHD_RESULT rc = MHD_queue_response (c, code, r);
823 : 630 : MHD_destroy_response (r);
824 : 630 : return rc;
825 : : }
826 : : };
827 : :
828 : :
829 : : struct sqlite_exception: public reportable_exception
830 : : {
831 : 0 : sqlite_exception(int rc, const string& msg):
832 [ # # # # : 0 : reportable_exception(string("sqlite3 error: ") + msg + ": " + string(sqlite3_errstr(rc) ?: "?")) {
# # # # ]
833 [ # # # # : 0 : inc_metric("error_count","sqlite3",sqlite3_errstr(rc));
# # # # #
# ]
834 : 0 : }
835 : : };
836 : :
837 : 4 : struct libc_exception: public reportable_exception
838 : : {
839 : 596 : libc_exception(int rc, const string& msg):
840 [ - + + - : 1788 : reportable_exception(string("libc error: ") + msg + ": " + string(strerror(rc) ?: "?")) {
+ - + - ]
841 [ + - + - : 1192 : inc_metric("error_count","libc",strerror(rc));
+ - + - ]
842 : 596 : }
843 : : };
844 : :
845 : :
846 : : struct archive_exception: public reportable_exception
847 : : {
848 : 0 : archive_exception(const string& msg):
849 [ # # ]: 0 : reportable_exception(string("libarchive error: ") + msg) {
850 [ # # # # : 0 : inc_metric("error_count","libarchive",msg);
# # ]
851 : 0 : }
852 : 0 : archive_exception(struct archive* a, const string& msg):
853 [ # # # # : 0 : reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) {
# # # # ]
854 [ # # # # : 0 : inc_metric("error_count","libarchive",msg + ": " + string(archive_error_string(a) ?: "?"));
# # # # #
# # # # #
# # ]
855 : 0 : }
856 : 0 : archive_exception(struct archive* a, const string& fname, const string& msg):
857 [ # # # # : 0 : reportable_exception(string("libarchive error: ") + fname + string(" ") + msg + ": " +
# # # # #
# ]
858 [ # # # # ]: 0 : string(archive_error_string(a) ?: "?")) {
859 [ # # # # : 0 : inc_metric("error_count","libarchive",msg + ": " + string(archive_error_string(a) ?: "?"));
# # # # #
# # # # #
# # ]
860 : 0 : }
861 : : };
862 : :
863 : :
864 : : struct elfutils_exception: public reportable_exception
865 : : {
866 : 0 : elfutils_exception(int rc, const string& msg):
867 [ # # # # : 0 : reportable_exception(string("elfutils error: ") + msg + ": " + string(elf_errmsg(rc) ?: "?")) {
# # # # ]
868 [ # # # # : 0 : inc_metric("error_count","elfutils",elf_errmsg(rc));
# # # # #
# ]
869 : 0 : }
870 : : };
871 : :
872 : :
873 : : ////////////////////////////////////////////////////////////////////////
874 : :
875 : : template <typename Payload>
876 : : class workq
877 : : {
878 : : unordered_set<Payload> q; // eliminate duplicates
879 : : mutex mtx;
880 : : condition_variable cv;
881 : : bool dead;
882 : : unsigned idlers; // number of threads busy with wait_idle / done_idle
883 : : unsigned fronters; // number of threads busy with wait_front / done_front
884 : :
885 : : public:
886 : 84 : workq() { dead = false; idlers = 0; fronters = 0; }
887 : 84 : ~workq() {}
888 : :
889 : 1314 : void push_back(const Payload& p)
890 : : {
891 : 1314 : unique_lock<mutex> lock(mtx);
892 [ + - ]: 1314 : q.insert (p);
893 [ + - + - : 2628 : set_metric("thread_work_pending","role","scan", q.size());
+ - + - ]
894 : 1314 : cv.notify_all();
895 : 1314 : }
896 : :
897 : : // kill this workqueue, wake up all idlers / scanners
898 : 84 : void nuke() {
899 : 84 : unique_lock<mutex> lock(mtx);
900 : : // optional: q.clear();
901 : 84 : dead = true;
902 : 84 : cv.notify_all();
903 : 84 : }
904 : :
905 : : // clear the workqueue, when scanning is interrupted with USR2
906 : 0 : void clear() {
907 : 0 : unique_lock<mutex> lock(mtx);
908 : 0 : q.clear();
909 [ # # # # : 0 : set_metric("thread_work_pending","role","scan", q.size());
# # # # ]
910 : : // NB: there may still be some live fronters
911 : 0 : cv.notify_all(); // maybe wake up waiting idlers
912 : 0 : }
913 : :
914 : : // block this scanner thread until there is work to do and no active idler
915 : 1602 : bool wait_front (Payload& p)
916 : : {
917 : 1602 : unique_lock<mutex> lock(mtx);
918 [ + + + + : 5711 : while (!dead && (q.size() == 0 || idlers > 0))
+ + ]
919 [ + - ]: 4109 : cv.wait(lock);
920 [ + + ]: 1602 : if (dead)
921 : : return false;
922 : : else
923 : : {
924 [ + - ]: 1314 : p = * q.begin();
925 : 1314 : q.erase (q.begin());
926 : 1314 : fronters ++; // prevent idlers from starting awhile, even if empty q
927 [ + - + - : 2628 : set_metric("thread_work_pending","role","scan", q.size());
+ - + - -
- ]
928 : : // NB: don't wake up idlers yet! The consumer is busy
929 : : // processing this element until it calls done_front().
930 : 1314 : return true;
931 : : }
932 : 1602 : }
933 : :
934 : : // notify waitq that scanner thread is done with that last item
935 : 1313 : void done_front ()
936 : : {
937 : 1313 : unique_lock<mutex> lock(mtx);
938 : 1314 : fronters --;
939 [ + + + + ]: 1314 : if (q.size() == 0 && fronters == 0)
940 : 100 : cv.notify_all(); // maybe wake up waiting idlers
941 : 1314 : }
942 : :
943 : : // block this idler thread until there is no work to do
944 : 592 : void wait_idle ()
945 : : {
946 : 592 : unique_lock<mutex> lock(mtx);
947 : 592 : cv.notify_all(); // maybe wake up waiting scanners
948 [ + + + + : 637 : while (!dead && ((q.size() != 0) || fronters > 0))
+ + ]
949 [ + - ]: 45 : cv.wait(lock);
950 [ + - ]: 592 : idlers ++;
951 : 592 : }
952 : :
953 : 510 : void done_idle ()
954 : : {
955 : 510 : unique_lock<mutex> lock(mtx);
956 : 510 : idlers --;
957 : 510 : cv.notify_all(); // maybe wake up waiting scanners, but probably not (shutting down)
958 : 510 : }
959 : : };
960 : :
961 : : typedef struct stat stat_t;
962 : : typedef pair<string,stat_t> scan_payload;
963 : : inline bool operator< (const scan_payload& a, const scan_payload& b)
964 : : {
965 : : return a.first < b.first; // don't bother compare the stat fields
966 : : }
967 : :
968 : : namespace std { // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56480
969 : : template<> struct hash<::scan_payload>
970 : : {
971 : 5342 : std::size_t operator() (const ::scan_payload& p) const noexcept
972 : : {
973 [ + + + + ]: 5342 : return hash<string>()(p.first);
974 : : }
975 : : };
976 : : template<> struct equal_to<::scan_payload>
977 : : {
978 : 563 : std::size_t operator() (const ::scan_payload& a, const ::scan_payload& b) const noexcept
979 : : {
980 [ + - - - ]: 563 : return a.first == b.first;
981 : : }
982 : : };
983 : : }
984 : :
985 : : static workq<scan_payload> scanq; // just a single one
986 : : // producer & idler: thread_main_fts_source_paths()
987 : : // consumer: thread_main_scanner()
988 : : // idler: thread_main_groom()
989 : :
990 : :
991 : : ////////////////////////////////////////////////////////////////////////
992 : :
993 : : // Unique set is a thread-safe structure that lends 'ownership' of a value
994 : : // to a thread. Other threads requesting the same thing are made to wait.
995 : : // It's like a semaphore-on-demand.
996 : : template <typename T>
997 : : class unique_set
998 : : {
999 : : private:
1000 : : set<T> values;
1001 : : mutex mtx;
1002 : : condition_variable cv;
1003 : : public:
1004 : 60 : unique_set() {}
1005 : 60 : ~unique_set() {}
1006 : :
1007 : 2596 : void acquire(const T& value)
1008 : : {
1009 : 2596 : unique_lock<mutex> lock(mtx);
1010 [ + + ]: 6775 : while (values.find(value) != values.end())
1011 [ + - ]: 4179 : cv.wait(lock);
1012 [ + - ]: 2596 : values.insert(value);
1013 : 2596 : }
1014 : :
1015 : 2596 : void release(const T& value)
1016 : : {
1017 : 2596 : unique_lock<mutex> lock(mtx);
1018 : : // assert (values.find(value) != values.end());
1019 : 2596 : values.erase(value);
1020 : 2596 : cv.notify_all();
1021 : 2596 : }
1022 : : };
1023 : :
1024 : :
1025 : : // This is the object that's instantiate to uniquely hold a value in a
1026 : : // RAII-pattern way.
1027 : : template <typename T>
1028 : : class unique_set_reserver
1029 : : {
1030 : : private:
1031 : : unique_set<T>& please_hold;
1032 : : T mine;
1033 : : public:
1034 : 2596 : unique_set_reserver(unique_set<T>& t, const T& value):
1035 [ + - ]: 2596 : please_hold(t), mine(value) { please_hold.acquire(mine); }
1036 : 2596 : ~unique_set_reserver() { please_hold.release(mine); }
1037 : : };
1038 : :
1039 : :
1040 : : ////////////////////////////////////////////////////////////////////////
1041 : :
1042 : : // periodic_barrier is a concurrency control object that lets N threads
1043 : : // periodically (based on counter value) agree to wait at a barrier,
1044 : : // let one of them carry out some work, then be set free
1045 : :
1046 : : class periodic_barrier
1047 : : {
1048 : : private:
1049 : : unsigned period; // number of count() reports to trigger barrier activation
1050 : : unsigned threads; // number of threads participating
1051 : : mutex mtx; // protects all the following fields
1052 : : unsigned counter; // count of count() reports in the current generation
1053 : : unsigned generation; // barrier activation generation
1054 : : unsigned waiting; // number of threads waiting for barrier
1055 : : bool dead; // bring out your
1056 : : condition_variable cv;
1057 : : public:
1058 : 72 : periodic_barrier(unsigned t, unsigned p):
1059 : 72 : period(p), threads(t), counter(0), generation(0), waiting(0), dead(false) { }
1060 : : virtual ~periodic_barrier() {}
1061 : :
1062 : : virtual void periodic_barrier_work() noexcept = 0;
1063 : 72 : void nuke() {
1064 : 72 : unique_lock<mutex> lock(mtx);
1065 : 72 : dead = true;
1066 : 72 : cv.notify_all();
1067 : 72 : }
1068 : :
1069 : 1602 : void count()
1070 : : {
1071 : 1602 : unique_lock<mutex> lock(mtx);
1072 : 1602 : unsigned prev_generation = this->generation;
1073 [ + + ]: 1602 : if (counter < period-1) // normal case: counter just freely running
1074 : : {
1075 : 1458 : counter ++;
1076 : 1458 : return;
1077 : : }
1078 [ + + ]: 144 : else if (counter == period-1) // we're the doer
1079 : : {
1080 : 36 : counter = period; // entering barrier holding phase
1081 : 36 : cv.notify_all();
1082 [ + + + - ]: 174 : while (waiting < threads-1 && !dead)
1083 [ + - ]: 102 : cv.wait(lock);
1084 : : // all other threads are now stuck in the barrier
1085 : 36 : this->periodic_barrier_work(); // NB: we're holding the mutex the whole time
1086 : : // reset for next barrier, releasing other waiters
1087 : 36 : counter = 0;
1088 : 36 : generation ++;
1089 : 36 : cv.notify_all();
1090 : 36 : return;
1091 : : }
1092 [ + - ]: 108 : else if (counter == period) // we're a waiter, in holding phase
1093 : : {
1094 : 108 : waiting ++;
1095 : 108 : cv.notify_all();
1096 [ + + + - : 395 : while (counter == period && generation == prev_generation && !dead)
+ - ]
1097 [ + - ]: 179 : cv.wait(lock);
1098 : 108 : waiting --;
1099 : 108 : return;
1100 : : }
1101 : 1602 : }
1102 : : };
1103 : :
1104 : :
1105 : :
1106 : : ////////////////////////////////////////////////////////////////////////
1107 : :
1108 : :
1109 : : // Print a standard timestamp.
1110 : : static ostream&
1111 : 52752 : timestamp (ostream &o)
1112 : : {
1113 : 52752 : char datebuf[80];
1114 : 52752 : char *now2 = NULL;
1115 : 52752 : time_t now_t = time(NULL);
1116 : 52752 : struct tm now;
1117 : 52752 : struct tm *nowp = gmtime_r (&now_t, &now);
1118 [ + - ]: 52754 : if (nowp)
1119 : : {
1120 : 52754 : (void) strftime (datebuf, sizeof (datebuf), "%c", nowp);
1121 : 52754 : now2 = datebuf;
1122 : : }
1123 : :
1124 : 52754 : return o << "[" << (now2 ? now2 : "") << "] "
1125 [ - + ]: 52754 : << "(" << getpid () << "/" << tid() << "): ";
1126 : : }
1127 : :
1128 : :
1129 : : // A little class that impersonates an ostream to the extent that it can
1130 : : // take << streaming operations. It batches up the bits into an internal
1131 : : // stringstream until it is destroyed; then flushes to the original ostream.
1132 : : // It adds a timestamp
1133 : : class obatched
1134 : : {
1135 : : private:
1136 : : ostream& o;
1137 : : stringstream stro;
1138 : : static mutex lock;
1139 : : public:
1140 : 52752 : obatched(ostream& oo, bool timestamp_p = true): o(oo)
1141 : : {
1142 [ + - ]: 52753 : if (timestamp_p)
1143 [ + - ]: 52753 : timestamp(stro);
1144 : 52753 : }
1145 : 52748 : ~obatched()
1146 : : {
1147 : 52748 : unique_lock<mutex> do_not_cross_the_streams(obatched::lock);
1148 : 52759 : o << stro.str();
1149 : 52759 : o.flush();
1150 : 52758 : }
1151 : : operator ostream& () { return stro; }
1152 [ - - + - : 42123 : template <typename T> ostream& operator << (const T& t) { stro << t; return stro; }
+ - + - +
- + - + -
- - - - -
- + - + -
+ - + - +
- + - + -
+ - + - +
- + - + -
- - + - +
- + - + -
- - + - +
- + - + -
- - + - +
- + - + -
+ - + - +
- - - + -
- - + - -
- + - + -
+ - - - -
- - - - -
- - - - +
- - - + -
+ - - - +
- + - - -
+ - - - +
- - - - -
+ - + - +
- + - + -
+ - + - -
- ]
1153 : : };
1154 : : mutex obatched::lock; // just the one, since cout/cerr iostreams are not thread-safe
1155 : :
1156 : :
1157 : 694 : void reportable_exception::report(ostream& o) const {
1158 [ + - + - ]: 694 : obatched(o) << message << endl;
1159 : 694 : }
1160 : :
1161 : :
1162 : : ////////////////////////////////////////////////////////////////////////
1163 : :
1164 : :
1165 : : // RAII style sqlite prepared-statement holder that matches { } block lifetime
1166 : :
1167 : : struct sqlite_ps
1168 : : {
1169 : : private:
1170 : : sqlite3* db;
1171 : : const string nickname;
1172 : : const string sql;
1173 : : sqlite3_stmt *pp;
1174 : : // for step_timeout()/callback
1175 : : struct timespec ts_start;
1176 : : double ts_timeout;
1177 : :
1178 : : sqlite_ps(const sqlite_ps&); // make uncopyable
1179 : : sqlite_ps& operator=(const sqlite_ps &); // make unassignable
1180 : :
1181 : : public:
1182 [ + - ]: 9850 : sqlite_ps (sqlite3* d, const string& n, const string& s): db(d), nickname(n), sql(s) {
1183 : : // tmp_ms_metric tick("sqlite3","prep",nickname);
1184 [ + + ]: 9847 : if (verbose > 4)
1185 [ + - + - : 174 : obatched(clog) << nickname << " prep " << sql << endl;
+ - + - +
- ]
1186 [ + - ]: 9847 : int rc = sqlite3_prepare_v2 (db, sql.c_str(), -1 /* to \0 */, & this->pp, NULL);
1187 [ - + ]: 9850 : if (rc != SQLITE_OK)
1188 [ # # # # ]: 0 : throw sqlite_exception(rc, "prepare " + sql);
1189 : 19700 : this->reset_timeout(0.0);
1190 : 0 : }
1191 : :
1192 : 206943 : sqlite_ps& reset()
1193 : : {
1194 [ + - + - ]: 413852 : tmp_ms_metric tick("sqlite3","reset",nickname);
1195 [ + - ]: 206908 : sqlite3_reset(this->pp);
1196 : 207040 : return *this;
1197 : 206924 : }
1198 : :
1199 : 237049 : sqlite_ps& bind(int parameter, const string& str)
1200 : : {
1201 [ + + ]: 237049 : if (verbose > 4)
1202 [ + - + - : 196 : obatched(clog) << nickname << " bind " << parameter << "=" << str << endl;
+ - + - +
- + - ]
1203 : 237049 : int rc = sqlite3_bind_text (this->pp, parameter, str.c_str(), -1, SQLITE_TRANSIENT);
1204 [ - + ]: 236924 : if (rc != SQLITE_OK)
1205 [ # # # # ]: 0 : throw sqlite_exception(rc, "sqlite3 bind");
1206 : 236924 : return *this;
1207 : : }
1208 : :
1209 : 64181 : sqlite_ps& bind(int parameter, int64_t value)
1210 : : {
1211 [ + + ]: 64181 : if (verbose > 4)
1212 [ + - + - : 64 : obatched(clog) << nickname << " bind " << parameter << "=" << value << endl;
+ - + - +
- + - ]
1213 : 64181 : int rc = sqlite3_bind_int64 (this->pp, parameter, value);
1214 [ - + ]: 64201 : if (rc != SQLITE_OK)
1215 [ # # # # ]: 0 : throw sqlite_exception(rc, "sqlite3 bind");
1216 : 64201 : return *this;
1217 : : }
1218 : :
1219 : : sqlite_ps& bind(int parameter)
1220 : : {
1221 : : if (verbose > 4)
1222 : : obatched(clog) << nickname << " bind " << parameter << "=" << "NULL" << endl;
1223 : : int rc = sqlite3_bind_null (this->pp, parameter);
1224 : : if (rc != SQLITE_OK)
1225 : : throw sqlite_exception(rc, "sqlite3 bind");
1226 : : return *this;
1227 : : }
1228 : :
1229 : :
1230 : 127172 : void step_ok_done() {
1231 [ + - + - ]: 254224 : tmp_ms_metric tick("sqlite3","step_done",nickname);
1232 [ + - ]: 127051 : int rc = sqlite3_step (this->pp);
1233 [ + + ]: 127474 : if (verbose > 4)
1234 [ + - + - : 128 : obatched(clog) << nickname << " step-ok-done(" << sqlite3_errstr(rc) << ") " << sql << endl;
+ - + - +
- + - + -
+ - ]
1235 [ + + - + ]: 127474 : if (rc != SQLITE_OK && rc != SQLITE_DONE && rc != SQLITE_ROW)
1236 [ # # # # ]: 0 : throw sqlite_exception(rc, "sqlite3 step");
1237 [ + - ]: 127474 : (void) sqlite3_reset (this->pp);
1238 : 127470 : }
1239 : :
1240 : :
1241 : 42482 : int step() {
1242 [ + - + - ]: 84961 : tmp_ms_metric tick("sqlite3","step",nickname);
1243 [ + - ]: 42479 : int rc = sqlite3_step (this->pp);
1244 [ + + ]: 42486 : if (verbose > 4)
1245 [ + - + - : 62 : obatched(clog) << nickname << " step(" << sqlite3_errstr(rc) << ") " << sql << endl;
+ - + - +
- + - + -
+ - ]
1246 : 42486 : return rc;
1247 : 42486 : }
1248 : :
1249 : :
1250 : 9890 : void reset_timeout(double s) // set starting point for maximum elapsed time in step_timeouts()
1251 : : {
1252 : 9850 : clock_gettime (CLOCK_MONOTONIC, &this->ts_start);
1253 : 9850 : this->ts_timeout = s;
1254 : : }
1255 : :
1256 : :
1257 : 0 : static int sqlite3_progress_handler_cb (void *param)
1258 : : {
1259 : 0 : sqlite_ps *pp = (sqlite_ps*) param;
1260 : 0 : struct timespec ts_end;
1261 : 0 : clock_gettime (CLOCK_MONOTONIC, &ts_end);
1262 : 0 : double deltas = (ts_end.tv_sec - pp->ts_start.tv_sec) + (ts_end.tv_nsec - pp->ts_start.tv_nsec)/1.e9;
1263 [ # # # # ]: 0 : return (interrupted || (deltas > pp->ts_timeout)); // non-zero => interrupt sqlite operation in progress
1264 : : }
1265 : :
1266 : :
1267 : 70 : int step_timeout() {
1268 : : // Do the same thing as step(), except wrapping it into a timeout
1269 : : // relative to the last reset_timeout() invocation.
1270 : : //
1271 : : // Do this by attaching a progress_handler to the database
1272 : : // connection, for the duration of this operation. It should be a
1273 : : // private connection to the calling thread, so other operations
1274 : : // cannot begin concurrently.
1275 : :
1276 : 70 : sqlite3_progress_handler(this->db, 10000 /* bytecode insns */,
1277 : : & sqlite3_progress_handler_cb, (void*) this);
1278 : 70 : int rc = this->step();
1279 : 70 : sqlite3_progress_handler(this->db, 0, 0, 0); // disable
1280 : 70 : struct timespec ts_end;
1281 : 70 : clock_gettime (CLOCK_MONOTONIC, &ts_end);
1282 : 70 : double deltas = (ts_end.tv_sec - this->ts_start.tv_sec) + (ts_end.tv_nsec - this->ts_start.tv_nsec)/1.e9;
1283 [ - + ]: 70 : if (verbose > 3)
1284 [ # # # # : 0 : obatched(clog) << this->nickname << " progress-delta-final " << deltas << endl;
# # # # ]
1285 : 70 : return rc;
1286 : : }
1287 : :
1288 : :
1289 : 9839 : ~sqlite_ps () { sqlite3_finalize (this->pp); }
1290 [ + - + - : 7130 : operator sqlite3_stmt* () { return this->pp; }
+ - + - +
- + - + -
+ - + - +
- ]
1291 : : };
1292 : :
1293 : :
1294 : : ////////////////////////////////////////////////////////////////////////
1295 : :
1296 : :
1297 : : struct sqlite_checkpoint_pb: public periodic_barrier
1298 : : {
1299 : : // NB: don't use sqlite_ps since it can throw exceptions during ctor etc.
1300 : 72 : sqlite_checkpoint_pb(unsigned t, unsigned p):
1301 : 144 : periodic_barrier(t, p) { }
1302 : :
1303 : 36 : void periodic_barrier_work() noexcept
1304 : : {
1305 : 36 : (void) sqlite3_exec (db, "pragma wal_checkpoint(truncate);", NULL, NULL, NULL);
1306 : 36 : }
1307 : : };
1308 : :
1309 : : static periodic_barrier* scan_barrier = 0; // initialized in main()
1310 : :
1311 : :
1312 : : ////////////////////////////////////////////////////////////////////////
1313 : :
1314 : : // RAII style templated autocloser
1315 : :
1316 : : template <class Payload, class Ignore>
1317 : : struct defer_dtor
1318 : : {
1319 : : public:
1320 : : typedef Ignore (*dtor_fn) (Payload);
1321 : :
1322 : : private:
1323 : : Payload p;
1324 : : dtor_fn fn;
1325 : :
1326 : : public:
1327 : 8054 : defer_dtor(Payload _p, dtor_fn _fn): p(_p), fn(_fn) {}
1328 : 426 : ~defer_dtor() { (void) (*fn)(p); }
1329 : :
1330 : : private:
1331 : : defer_dtor(const defer_dtor<Payload,Ignore>&); // make uncopyable
1332 : : defer_dtor& operator=(const defer_dtor<Payload,Ignore> &); // make unassignable
1333 : : };
1334 : :
1335 : :
1336 : :
1337 : : ////////////////////////////////////////////////////////////////////////
1338 : :
1339 : :
1340 : : static string
1341 : 6786 : header_censor(const string& str)
1342 : : {
1343 : 6786 : string y;
1344 [ + + ]: 84105 : for (auto&& x : str)
1345 : : {
1346 [ + + ]: 77319 : if (isalnum(x) || x == '/' || x == '.' || x == ',' || x == '_' || x == ':')
1347 [ + - ]: 154632 : y += x;
1348 : : }
1349 : 6786 : return y;
1350 : 0 : }
1351 : :
1352 : :
1353 : : static string
1354 : 3393 : conninfo (struct MHD_Connection * conn)
1355 : : {
1356 : 3393 : char hostname[256]; // RFC1035
1357 : 3393 : char servname[256];
1358 : 3393 : int sts = -1;
1359 : :
1360 [ - + ]: 3393 : if (conn == 0)
1361 : 0 : return "internal";
1362 : :
1363 : : /* Look up client address data. */
1364 : 3393 : const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn,
1365 : : MHD_CONNECTION_INFO_CLIENT_ADDRESS);
1366 [ + - ]: 3393 : struct sockaddr *so = u ? u->client_addr : 0;
1367 : :
1368 [ + - - + ]: 3393 : if (so && so->sa_family == AF_INET) {
1369 : 0 : sts = getnameinfo (so, sizeof (struct sockaddr_in),
1370 : : hostname, sizeof (hostname),
1371 : : servname, sizeof (servname),
1372 : : NI_NUMERICHOST | NI_NUMERICSERV);
1373 [ + - ]: 3393 : } else if (so && so->sa_family == AF_INET6) {
1374 : 3393 : struct sockaddr_in6* addr6 = (struct sockaddr_in6*) so;
1375 [ + - + - : 3393 : if (IN6_IS_ADDR_V4MAPPED(&addr6->sin6_addr)) {
+ + ]
1376 : 1631 : struct sockaddr_in addr4;
1377 : 1631 : memset (&addr4, 0, sizeof(addr4));
1378 : 1631 : addr4.sin_family = AF_INET;
1379 : 1631 : addr4.sin_port = addr6->sin6_port;
1380 : 1631 : memcpy (&addr4.sin_addr.s_addr, addr6->sin6_addr.s6_addr+12, sizeof(addr4.sin_addr.s_addr));
1381 : 1631 : sts = getnameinfo ((struct sockaddr*) &addr4, sizeof (addr4),
1382 : : hostname, sizeof (hostname),
1383 : : servname, sizeof (servname),
1384 : : NI_NUMERICHOST | NI_NUMERICSERV);
1385 : : } else {
1386 : 1762 : sts = getnameinfo (so, sizeof (struct sockaddr_in6),
1387 : : hostname, sizeof (hostname),
1388 : : servname, sizeof (servname),
1389 : : NI_NUMERICHOST | NI_NUMERICSERV);
1390 : : }
1391 : : }
1392 : :
1393 [ - + ]: 3393 : if (sts != 0) {
1394 : 0 : hostname[0] = servname[0] = '\0';
1395 : : }
1396 : :
1397 : : // extract headers relevant to administration
1398 [ + + ]: 3393 : const char* user_agent = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: "";
1399 [ + + ]: 3393 : const char* x_forwarded_for = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: "";
1400 : : // NB: these are untrustworthy, beware if machine-processing log files
1401 : :
1402 [ + - + - : 10179 : return string(hostname) + string(":") + string(servname) +
+ - + - +
- ]
1403 [ + - + - : 13572 : string(" UA:") + header_censor(string(user_agent)) +
+ - + - +
- ]
1404 [ + - + - : 10179 : string(" XFF:") + header_censor(string(x_forwarded_for));
+ - + - ]
1405 : : }
1406 : :
1407 : :
1408 : :
1409 : : ////////////////////////////////////////////////////////////////////////
1410 : :
1411 : : /* Wrapper for MHD_add_response_header that logs an error if we
1412 : : couldn't add the specified header. */
1413 : : static void
1414 : 13331 : add_mhd_response_header (struct MHD_Response *r,
1415 : : const char *h, const char *v)
1416 : : {
1417 [ - + ]: 13331 : if (MHD_add_response_header (r, h, v) == MHD_NO)
1418 [ # # # # : 0 : obatched(clog) << "Error: couldn't add '" << h << "' header" << endl;
# # ]
1419 : 13331 : }
1420 : :
1421 : : static void
1422 : 2154 : add_mhd_last_modified (struct MHD_Response *resp, time_t mtime)
1423 : : {
1424 : 2154 : struct tm now;
1425 : 2154 : struct tm *nowp = gmtime_r (&mtime, &now);
1426 [ + - ]: 2154 : if (nowp != NULL)
1427 : : {
1428 : 2154 : char datebuf[80];
1429 : 2154 : size_t rc = strftime (datebuf, sizeof (datebuf), "%a, %d %b %Y %T GMT",
1430 : : nowp);
1431 [ + - ]: 2154 : if (rc > 0 && rc < sizeof (datebuf))
1432 : 2154 : add_mhd_response_header (resp, "Last-Modified", datebuf);
1433 : : }
1434 : :
1435 : 2154 : add_mhd_response_header (resp, "Cache-Control", "public");
1436 : 2154 : }
1437 : :
1438 : : // quote all questionable characters of str for safe passage through a sh -c expansion.
1439 : : static string
1440 : 568 : shell_escape(const string& str)
1441 : : {
1442 : 568 : string y;
1443 [ + + ]: 72928 : for (auto&& x : str)
1444 : : {
1445 [ + + + + ]: 72360 : if (! isalnum(x) && x != '/')
1446 [ + - ]: 8966 : y += "\\";
1447 [ + - ]: 144720 : y += x;
1448 : : }
1449 : 568 : return y;
1450 : 0 : }
1451 : :
1452 : :
1453 : : // PR25548: Perform POSIX / RFC3986 style path canonicalization on the input string.
1454 : : //
1455 : : // Namely:
1456 : : // // -> /
1457 : : // /foo/../ -> /
1458 : : // /./ -> /
1459 : : //
1460 : : // This mapping is done on dwarf-side source path names, which may
1461 : : // include these constructs, so we can deal with debuginfod clients
1462 : : // that accidentally canonicalize the paths.
1463 : : //
1464 : : // realpath(3) is close but not quite right, because it also resolves
1465 : : // symbolic links. Symlinks at the debuginfod server have nothing to
1466 : : // do with the build-time symlinks, thus they must not be considered.
1467 : : //
1468 : : // see also curl Curl_dedotdotify() aka RFC3986, which we mostly follow here
1469 : : // see also libc __realpath()
1470 : : // see also llvm llvm::sys::path::remove_dots()
1471 : : static string
1472 : 17736 : canon_pathname (const string& input)
1473 : : {
1474 : 17736 : string i = input; // 5.2.4 (1)
1475 : 17736 : string o;
1476 : :
1477 : 17736 : while (i.size() != 0)
1478 : : {
1479 : : // 5.2.4 (2) A
1480 [ + - - + ]: 152472 : if (i.substr(0,3) == "../")
1481 [ # # ]: 0 : i = i.substr(3);
1482 [ + - - + ]: 152472 : else if(i.substr(0,2) == "./")
1483 [ # # ]: 0 : i = i.substr(2);
1484 : :
1485 : : // 5.2.4 (2) B
1486 [ + - + + ]: 152472 : else if (i.substr(0,3) == "/./")
1487 [ + - ]: 1984 : i = i.substr(2);
1488 [ - + ]: 150488 : else if (i == "/.")
1489 [ # # ]: 0 : i = ""; // no need to handle "/." complete-path-segment case; we're dealing with file names
1490 : :
1491 : : // 5.2.4 (2) C
1492 [ + - + + ]: 150488 : else if (i.substr(0,4) == "/../") {
1493 [ + - ]: 2666 : i = i.substr(3);
1494 : 2666 : string::size_type sl = o.rfind("/");
1495 [ + - ]: 2666 : if (sl != string::npos)
1496 [ + - ]: 2666 : o = o.substr(0, sl);
1497 : : else
1498 [ # # ]: 0 : o = "";
1499 [ - + ]: 147822 : } else if (i == "/..")
1500 [ # # ]: 0 : i = ""; // no need to handle "/.." complete-path-segment case; we're dealing with file names
1501 : :
1502 : : // 5.2.4 (2) D
1503 : : // no need to handle these cases; we're dealing with file names
1504 [ - + ]: 147822 : else if (i == ".")
1505 [ # # ]: 0 : i = "";
1506 [ - + ]: 147822 : else if (i == "..")
1507 [ # # ]: 0 : i = "";
1508 : :
1509 : : // POSIX special: map // to /
1510 [ + - + + ]: 147822 : else if (i.substr(0,2) == "//")
1511 [ + - ]: 128 : i = i.substr(1);
1512 : :
1513 : : // 5.2.4 (2) E
1514 : : else {
1515 [ - + ]: 147694 : string::size_type next_slash = i.find("/", (i[0]=='/' ? 1 : 0)); // skip first slash
1516 [ + - ]: 295388 : o += i.substr(0, next_slash);
1517 [ + + ]: 147694 : if (next_slash == string::npos)
1518 [ + - + + ]: 187944 : i = "";
1519 : : else
1520 [ + - ]: 129958 : i = i.substr(next_slash);
1521 : : }
1522 : : }
1523 : :
1524 : 17736 : return o;
1525 : 17736 : }
1526 : :
1527 : :
1528 : : // Estimate available free space for a given filesystem via statfs(2).
1529 : : // Return true if the free fraction is known to be smaller than the
1530 : : // given minimum percentage. Also update a related metric.
1531 : 4210 : bool statfs_free_enough_p(const string& path, const string& label, long minfree = 0)
1532 : : {
1533 : 4210 : struct statfs sfs;
1534 : 4210 : int rc = statfs(path.c_str(), &sfs);
1535 [ + + ]: 4209 : if (rc == 0)
1536 : : {
1537 : 4135 : double s = (double) sfs.f_bavail / (double) sfs.f_blocks;
1538 [ + - + - ]: 8271 : set_metric("filesys_free_ratio","purpose",label, s);
1539 : 4136 : return ((s * 100.0) < minfree);
1540 : : }
1541 : : return false;
1542 : : }
1543 : :
1544 : :
1545 : :
1546 : : // A map-like class that owns a cache of file descriptors (indexed by
1547 : : // file / content names).
1548 : : //
1549 : : // If only it could use fd's instead of file names ... but we can't
1550 : : // dup(2) to create independent descriptors for the same unlinked
1551 : : // files, so would have to use some goofy linux /proc/self/fd/%d
1552 : : // hack such as the following
1553 : :
1554 : : #if 0
1555 : : int superdup(int fd)
1556 : : {
1557 : : #ifdef __linux__
1558 : : char *fdpath = NULL;
1559 : : int rc = asprintf(& fdpath, "/proc/self/fd/%d", fd);
1560 : : int newfd;
1561 : : if (rc >= 0)
1562 : : newfd = open(fdpath, O_RDONLY);
1563 : : else
1564 : : newfd = -1;
1565 : : free (fdpath);
1566 : : return newfd;
1567 : : #else
1568 : : return -1;
1569 : : #endif
1570 : : }
1571 : : #endif
1572 : :
1573 : : class libarchive_fdcache
1574 : : {
1575 : : private:
1576 : : mutex fdcache_lock;
1577 : :
1578 : : typedef pair<string,string> key; // archive, entry
1579 : 1504 : struct fdcache_entry
1580 : : {
1581 : : string fd; // file name (probably in $TMPDIR), not an actual open fd (EMFILE)
1582 : : double fd_size_mb; // slightly rounded up megabytes
1583 : : time_t freshness; // when was this entry created or requested last
1584 : : unsigned request_count; // how many requests were made; or 0=prefetch only
1585 : : double latency; // how many seconds it took to extract the file
1586 : : };
1587 : :
1588 : : map<key,fdcache_entry> entries; // optimized for lookup
1589 : : time_t last_cleaning;
1590 : : long max_mbs;
1591 : :
1592 : : public:
1593 : 3006 : void set_metrics()
1594 : : {
1595 : 3006 : double fdcache_mb = 0.0;
1596 : 3006 : double prefetch_mb = 0.0;
1597 : 3006 : unsigned fdcache_count = 0;
1598 : 3006 : unsigned prefetch_count = 0;
1599 [ + + ]: 6152 : for (auto &i : entries) {
1600 [ + + ]: 3146 : if (i.second.request_count) {
1601 : 2550 : fdcache_mb += i.second.fd_size_mb;
1602 : 2550 : fdcache_count ++;
1603 : : } else {
1604 : 596 : prefetch_mb += i.second.fd_size_mb;
1605 : 596 : prefetch_count ++;
1606 : : }
1607 : : }
1608 [ + - ]: 3006 : set_metric("fdcache_bytes", fdcache_mb*1024.0*1024.0);
1609 [ + - ]: 3006 : set_metric("fdcache_count", fdcache_count);
1610 [ + - ]: 3006 : set_metric("fdcache_prefetch_bytes", prefetch_mb*1024.0*1024.0);
1611 [ + - ]: 3006 : set_metric("fdcache_prefetch_count", prefetch_count);
1612 : 3006 : }
1613 : :
1614 : 1504 : void intern(const string& a, const string& b, string fd, off_t sz,
1615 : : bool requested_p, double lat)
1616 : : {
1617 : 1504 : {
1618 : 1504 : unique_lock<mutex> lock(fdcache_lock);
1619 : 1504 : time_t now = time(NULL);
1620 : : // there is a chance it's already in here, just wasn't found last time
1621 : : // if so, there's nothing to do but count our luck
1622 [ + - ]: 1504 : auto i = entries.find(make_pair(a,b));
1623 [ - + ]: 1504 : if (i != entries.end())
1624 : : {
1625 [ # # # # : 0 : inc_metric("fdcache_op_count","op","redundant_intern");
# # # # ]
1626 [ # # ]: 0 : if (requested_p) i->second.request_count ++; // repeat prefetch doesn't count
1627 : 0 : i->second.freshness = now;
1628 : : // We need to nuke the temp file, since interning passes
1629 : : // responsibility over the path to this structure. It is
1630 : : // possible that the caller still has an fd open, but that's
1631 : : // OK.
1632 : 0 : unlink (fd.c_str());
1633 : 0 : return;
1634 : : }
1635 : 1504 : double mb = (sz+65535)/1048576.0; // round up to 64K block
1636 : 1504 : fdcache_entry n = { .fd=fd, .fd_size_mb=mb,
1637 : 1504 : .freshness=now, .request_count = requested_p?1U:0U,
1638 [ + - + + ]: 1504 : .latency=lat};
1639 [ + - + - : 1504 : entries.insert(make_pair(make_pair(a,b),n));
+ - ]
1640 : :
1641 [ + + ]: 1504 : if (requested_p)
1642 [ + - + - : 1816 : inc_metric("fdcache_op_count","op","enqueue");
+ - + - ]
1643 : : else
1644 [ + - + - : 1192 : inc_metric("fdcache_op_count","op","prefetch_enqueue");
+ - + - ]
1645 : :
1646 [ + + ]: 1504 : if (verbose > 3)
1647 [ + - + - ]: 4164 : obatched(clog) << "fdcache interned a=" << a << " b=" << b
1648 [ + - + - : 1388 : << " fd=" << fd << " mb=" << mb << " front=" << requested_p
+ - + - +
- + - + -
+ - ]
1649 [ + - + - : 1388 : << " latency=" << lat << endl;
+ - ]
1650 : :
1651 [ + - ]: 1504 : set_metrics();
1652 : 1504 : }
1653 : :
1654 : : // NB: we age the cache at lookup time too
1655 [ + - + + ]: 1504 : if (statfs_free_enough_p(tmpdir, "tmpdir", fdcache_mintmp))
1656 : : {
1657 [ + - + - : 2836 : inc_metric("fdcache_op_count","op","emerg-flush");
+ - ]
1658 [ + - ]: 2836 : obatched(clog) << "fdcache emergency flush for filling tmpdir" << endl;
1659 : 1418 : this->limit(0); // emergency flush
1660 : : }
1661 : : else // age cache normally
1662 : 86 : this->limit(max_mbs);
1663 : : }
1664 : :
1665 : 1012 : int lookup(const string& a, const string& b)
1666 : : {
1667 : 1012 : int fd = -1;
1668 : 1012 : {
1669 : 1012 : unique_lock<mutex> lock(fdcache_lock);
1670 [ + - ]: 1012 : auto i = entries.find(make_pair(a,b));
1671 [ + + ]: 1012 : if (i != entries.end())
1672 : : {
1673 [ + + ]: 100 : if (i->second.request_count == 0) // was a prefetch!
1674 : : {
1675 [ + - + - ]: 2 : inc_metric("fdcache_prefetch_saved_milliseconds_count");
1676 [ + - + - ]: 4 : add_metric("fdcache_prefetch_saved_milliseconds_sum", i->second.latency*1000.);
1677 : : }
1678 : 100 : i->second.request_count ++;
1679 : 100 : i->second.freshness = time(NULL);
1680 : : // brag about our success
1681 [ + - + - : 200 : inc_metric("fdcache_op_count","op","prefetch_access"); // backward compat
+ - + - ]
1682 [ + - + - ]: 100 : inc_metric("fdcache_saved_milliseconds_count");
1683 [ + - + - ]: 100 : add_metric("fdcache_saved_milliseconds_sum", i->second.latency*1000.);
1684 [ + - ]: 1012 : fd = open(i->second.fd.c_str(), O_RDONLY);
1685 : : }
1686 : 0 : }
1687 : :
1688 [ + + ]: 1012 : if (fd >= 0)
1689 [ + - + - : 200 : inc_metric("fdcache_op_count","op","lookup_hit");
+ - ]
1690 : : else
1691 [ + - + - : 1824 : inc_metric("fdcache_op_count","op","lookup_miss");
+ - ]
1692 : :
1693 : : // NB: no need to age the cache after just a lookup
1694 : :
1695 : 1012 : return fd;
1696 : : }
1697 : :
1698 : 1296 : int probe(const string& a, const string& b) // just a cache residency check - don't modify state, don't open
1699 : : {
1700 : 1296 : unique_lock<mutex> lock(fdcache_lock);
1701 [ + - ]: 1296 : auto i = entries.find(make_pair(a,b));
1702 [ - + ]: 1296 : if (i != entries.end()) {
1703 [ # # # # : 0 : inc_metric("fdcache_op_count","op","probe_hit");
# # # # ]
1704 : 0 : return true;
1705 : : } else {
1706 [ + - + - : 2592 : inc_metric("fdcache_op_count","op","probe_miss");
+ - + - ]
1707 : 1296 : return false;
1708 : : }
1709 : 1296 : }
1710 : :
1711 : 0 : void clear(const string& a, const string& b)
1712 : : {
1713 : 0 : unique_lock<mutex> lock(fdcache_lock);
1714 [ # # ]: 0 : auto i = entries.find(make_pair(a,b));
1715 [ # # ]: 0 : if (i != entries.end()) {
1716 [ # # # # : 0 : inc_metric("fdcache_op_count","op",
# # # # #
# ]
1717 [ # # ]: 0 : i->second.request_count > 0 ? "clear" : "prefetch_clear");
1718 : 0 : unlink (i->second.fd.c_str());
1719 : 0 : entries.erase(i);
1720 [ # # ]: 0 : set_metrics();
1721 : 0 : return;
1722 : : }
1723 : 0 : }
1724 : :
1725 : 1672 : void limit(long maxmbs, bool metrics_p = true)
1726 : : {
1727 : 1672 : time_t now = time(NULL);
1728 : :
1729 : : // avoid overly frequent limit operations
1730 [ + + + + ]: 1672 : if (maxmbs > 0 && (now - this->last_cleaning) < 10) // probably not worth parametrizing
1731 : 86 : return;
1732 : 1586 : this->last_cleaning = now;
1733 : :
1734 [ + + + + ]: 1586 : if (verbose > 3 && (this->max_mbs != maxmbs))
1735 [ + - + - ]: 192 : obatched(clog) << "fdcache limited to maxmbs=" << maxmbs << endl;
1736 : :
1737 : 1586 : unique_lock<mutex> lock(fdcache_lock);
1738 : :
1739 : 1586 : this->max_mbs = maxmbs;
1740 : 1586 : double total_mb = 0.0;
1741 : :
1742 : 1586 : map<double, pair<string,string>> sorted_entries;
1743 [ + + ]: 3090 : for (auto &i: entries)
1744 : : {
1745 : 1504 : total_mb += i.second.fd_size_mb;
1746 : :
1747 : : // need a scalar quantity that combines these inputs in a sensible way:
1748 : : //
1749 : : // 1) freshness of this entry (last time it was accessed)
1750 : : // 2) size of this entry
1751 : : // 3) number of times it has been accessed (or if just prefetched with 0 accesses)
1752 : : // 4) latency it required to extract
1753 : : //
1754 : : // The lower the "score", the earlier garbage collection will
1755 : : // nuke it, so to prioritize entries for preservation, the
1756 : : // score should be higher, and vice versa.
1757 : 1504 : time_t factor_1_freshness = (now - i.second.freshness); // seconds
1758 : 1504 : double factor_2_size = i.second.fd_size_mb; // megabytes
1759 : 1504 : unsigned factor_3_accesscount = i.second.request_count; // units
1760 : 1504 : double factor_4_latency = i.second.latency; // seconds
1761 : :
1762 : : #if 0
1763 : : double score = - factor_1_freshness; // simple LRU
1764 : : #endif
1765 : :
1766 [ + + ]: 1504 : double score = 0.
1767 : 1504 : - log1p(factor_1_freshness) // penalize old file
1768 : 1504 : - log1p(factor_2_size) // penalize large file
1769 : 1504 : + factor_4_latency * factor_3_accesscount; // reward slow + repeatedly read files
1770 : :
1771 [ + + ]: 1504 : if (verbose > 4)
1772 [ + - ]: 8 : obatched(clog) << "fdcache scored score=" << score
1773 [ + - + - ]: 8 : << " a=" << i.first.first << " b=" << i.first.second
1774 [ + - + - : 12 : << " f1=" << factor_1_freshness << " f2=" << factor_2_size
+ - + - +
- + - +
- ]
1775 [ + - + - : 4 : << " f3=" << factor_3_accesscount << " f4=" << factor_4_latency
+ - + - +
- ]
1776 : 4 : << endl;
1777 : :
1778 [ + - + - ]: 4512 : sorted_entries.insert(make_pair(score, i.first));
1779 : : }
1780 : :
1781 : 1586 : unsigned cleaned = 0;
1782 : 1586 : unsigned entries_original = entries.size();
1783 : 1586 : double cleaned_score_min = DBL_MAX;
1784 : 1586 : double cleaned_score_max = DBL_MIN;
1785 : :
1786 : : // drop as many entries[] as needed to bring total mb down to the threshold
1787 [ + + ]: 3090 : for (auto &i: sorted_entries) // in increasing score order!
1788 : : {
1789 [ - + ]: 1504 : if (this->max_mbs > 0 // if this is not a "clear entire table"
1790 [ # # ]: 0 : && total_mb < this->max_mbs) // we've cleared enough to meet threshold
1791 : : break; // stop clearing
1792 : :
1793 [ - + ]: 1504 : auto j = entries.find(i.second);
1794 [ - + ]: 1504 : if (j == entries.end())
1795 : 0 : continue; // should not happen
1796 : :
1797 [ + + ]: 1504 : if (cleaned == 0)
1798 : 1422 : cleaned_score_min = i.first;
1799 : 1504 : cleaned++;
1800 : 1504 : cleaned_score_max = i.first;
1801 : :
1802 [ + + ]: 1504 : if (verbose > 3)
1803 [ + - + - ]: 4164 : obatched(clog) << "fdcache evicted score=" << i.first
1804 [ + - + - ]: 2776 : << " a=" << i.second.first << " b=" << i.second.second
1805 [ + - + - : 4164 : << " fd=" << j->second.fd << " mb=" << j->second.fd_size_mb
+ - + - +
- + - ]
1806 [ + - + - : 1388 : << " rq=" << j->second.request_count << " lat=" << j->second.latency
+ - + - ]
1807 [ + - + - : 1388 : << " fr=" << (now - j->second.freshness)
+ - ]
1808 : 1388 : << endl;
1809 [ + + ]: 1504 : if (metrics_p)
1810 [ + - + - : 2836 : inc_metric("fdcache_op_count","op","evict");
+ - + - ]
1811 : :
1812 : 1504 : total_mb -= j->second.fd_size_mb;
1813 : 1504 : unlink (j->second.fd.c_str());
1814 : 1504 : entries.erase(j);
1815 : : }
1816 : :
1817 [ + + ]: 1586 : if (metrics_p)
1818 [ + - + - : 3004 : inc_metric("fdcache_op_count","op","evict_cycle");
+ - + - ]
1819 : :
1820 [ + - + + ]: 1586 : if (verbose > 1 && cleaned > 0)
1821 : : {
1822 [ + - + - : 4266 : obatched(clog) << "fdcache evicted num=" << cleaned << " of=" << entries_original
+ - + - ]
1823 [ + - + - : 1422 : << " min=" << cleaned_score_min << " max=" << cleaned_score_max
+ - + - +
- ]
1824 : 1422 : << endl;
1825 : : }
1826 : :
1827 [ + + + - ]: 1586 : if (metrics_p) set_metrics();
1828 : 1586 : }
1829 : :
1830 : :
1831 : 84 : ~libarchive_fdcache()
1832 : : {
1833 : : // unlink any fdcache entries in $TMPDIR
1834 : : // don't update metrics; those globals may be already destroyed
1835 : 84 : limit(0, false);
1836 : 84 : }
1837 : : };
1838 : : static libarchive_fdcache fdcache;
1839 : :
1840 : : /* Search ELF_FD for an ELF/DWARF section with name SECTION.
1841 : : If found copy the section to a temporary file and return
1842 : : its file descriptor, otherwise return -1.
1843 : :
1844 : : The temporary file's mtime will be set to PARENT_MTIME.
1845 : : B_SOURCE should be a description of the parent file suitable
1846 : : for printing to the log. */
1847 : :
1848 : : static int
1849 : 12 : extract_section (int elf_fd, int64_t parent_mtime,
1850 : : const string& b_source, const string& section,
1851 : : const timespec& extract_begin)
1852 : : {
1853 : : /* Search the fdcache. */
1854 : 12 : struct stat fs;
1855 : 12 : int fd = fdcache.lookup (b_source, section);
1856 [ - + ]: 12 : if (fd >= 0)
1857 : : {
1858 [ # # ]: 0 : if (fstat (fd, &fs) != 0)
1859 : : {
1860 [ # # ]: 0 : if (verbose)
1861 [ # # ]: 0 : obatched (clog) << "cannot fstate fdcache "
1862 [ # # # # : 0 : << b_source << " " << section << endl;
# # ]
1863 : 0 : close (fd);
1864 : 0 : return -1;
1865 : : }
1866 [ # # ]: 0 : if ((int64_t) fs.st_mtime != parent_mtime)
1867 : : {
1868 [ # # ]: 0 : if (verbose)
1869 [ # # ]: 0 : obatched(clog) << "mtime mismatch for "
1870 [ # # # # : 0 : << b_source << " " << section << endl;
# # ]
1871 : 0 : close (fd);
1872 : 0 : return -1;
1873 : : }
1874 : : /* Success. */
1875 : : return fd;
1876 : : }
1877 : :
1878 : 12 : Elf *elf = elf_begin (elf_fd, ELF_C_READ_MMAP_PRIVATE, NULL);
1879 [ - + ]: 12 : if (elf == NULL)
1880 : : return -1;
1881 : :
1882 : : /* Try to find the section and copy the contents into a separate file. */
1883 : 12 : try
1884 : : {
1885 : 12 : size_t shstrndx;
1886 [ + - ]: 12 : int rc = elf_getshdrstrndx (elf, &shstrndx);
1887 [ - + ]: 12 : if (rc < 0)
1888 [ # # # # ]: 0 : throw elfutils_exception (rc, "getshdrstrndx");
1889 : :
1890 : : Elf_Scn *scn = NULL;
1891 : 424 : while (true)
1892 : : {
1893 [ + - ]: 218 : scn = elf_nextscn (elf, scn);
1894 [ + - ]: 218 : if (scn == NULL)
1895 : : break;
1896 : 218 : GElf_Shdr shdr_storage;
1897 [ + - ]: 218 : GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_storage);
1898 [ + - ]: 218 : if (shdr == NULL)
1899 : : break;
1900 : :
1901 [ + - ]: 218 : const char *scn_name = elf_strptr (elf, shstrndx, shdr->sh_name);
1902 [ + - ]: 218 : if (scn_name == NULL)
1903 : : break;
1904 [ + + ]: 218 : if (scn_name == section)
1905 : : {
1906 : 12 : Elf_Data *data = NULL;
1907 : :
1908 : : /* We found the desired section. */
1909 [ + - ]: 12 : data = elf_rawdata (scn, NULL);
1910 [ - + ]: 12 : if (data == NULL)
1911 [ # # # # : 0 : throw elfutils_exception (elf_errno (), "elfraw_data");
# # ]
1912 [ + + ]: 12 : if (data->d_buf == NULL)
1913 : : {
1914 [ + - + - ]: 8 : obatched(clog) << "section " << section
1915 [ + - + - ]: 4 : << " is empty" << endl;
1916 : 4 : break;
1917 : : }
1918 : :
1919 : : /* Create temporary file containing the section. */
1920 : 8 : char *tmppath = NULL;
1921 : 8 : rc = asprintf (&tmppath, "%s/debuginfod-section.XXXXXX", tmpdir.c_str());
1922 [ - + ]: 8 : if (rc < 0)
1923 [ # # # # ]: 0 : throw libc_exception (ENOMEM, "cannot allocate tmppath");
1924 : 8 : defer_dtor<void*,void> tmmpath_freer (tmppath, free);
1925 [ + - ]: 8 : fd = mkstemp (tmppath);
1926 [ - + ]: 8 : if (fd < 0)
1927 [ # # # # ]: 0 : throw libc_exception (errno, "cannot create temporary file");
1928 : :
1929 [ + - ]: 8 : ssize_t res = write_retry (fd, data->d_buf, data->d_size);
1930 [ + - - + ]: 8 : if (res < 0 || (size_t) res != data->d_size) {
1931 [ # # ]: 0 : close (fd);
1932 : 0 : unlink (tmppath);
1933 [ # # # # ]: 0 : throw libc_exception (errno, "cannot write to temporary file");
1934 : : }
1935 : :
1936 : : /* Set mtime to be the same as the parent file's mtime. */
1937 : 8 : struct timespec tvs[2];
1938 [ - + ]: 8 : if (fstat (elf_fd, &fs) != 0) {
1939 [ # # ]: 0 : close (fd);
1940 : 0 : unlink (tmppath);
1941 [ # # # # ]: 0 : throw libc_exception (errno, "cannot fstat file");
1942 : : }
1943 : :
1944 : 8 : tvs[0].tv_sec = 0;
1945 : 8 : tvs[0].tv_nsec = UTIME_OMIT;
1946 : 8 : tvs[1] = fs.st_mtim;
1947 : 8 : (void) futimens (fd, tvs);
1948 : :
1949 : 8 : struct timespec extract_end;
1950 : 8 : clock_gettime (CLOCK_MONOTONIC, &extract_end);
1951 : 8 : double extract_time = (extract_end.tv_sec - extract_begin.tv_sec)
1952 : 8 : + (extract_end.tv_nsec - extract_begin.tv_nsec)/1.e9;
1953 : :
1954 : : /* Add to fdcache. */
1955 [ + - + - ]: 8 : fdcache.intern (b_source, section, tmppath, data->d_size, true, extract_time);
1956 : 8 : break;
1957 : 8 : }
1958 : 206 : }
1959 : : }
1960 [ - - ]: 0 : catch (const reportable_exception &e)
1961 : : {
1962 [ - - ]: 0 : e.report (clog);
1963 [ - - ]: 0 : close (fd);
1964 : 0 : fd = -1;
1965 : 0 : }
1966 : :
1967 : 12 : elf_end (elf);
1968 : : return fd;
1969 : : }
1970 : :
1971 : : static struct MHD_Response*
1972 : 1154 : handle_buildid_f_match (bool internal_req_t,
1973 : : int64_t b_mtime,
1974 : : const string& b_source0,
1975 : : const string& section,
1976 : : int *result_fd)
1977 : : {
1978 : 1154 : (void) internal_req_t; // ignored
1979 : :
1980 : 1154 : struct timespec extract_begin;
1981 : 1154 : clock_gettime (CLOCK_MONOTONIC, &extract_begin);
1982 : :
1983 : 1154 : int fd = open(b_source0.c_str(), O_RDONLY);
1984 [ - + ]: 1154 : if (fd < 0)
1985 [ # # # # : 0 : throw libc_exception (errno, string("open ") + b_source0);
# # ]
1986 : :
1987 : : // NB: use manual close(2) in error case instead of defer_dtor, because
1988 : : // in the normal case, we want to hand the fd over to libmicrohttpd for
1989 : : // file transfer.
1990 : :
1991 : 1154 : struct stat s;
1992 : 1154 : int rc = fstat(fd, &s);
1993 [ - + ]: 1154 : if (rc < 0)
1994 : : {
1995 : 0 : close(fd);
1996 [ # # # # : 0 : throw libc_exception (errno, string("fstat ") + b_source0);
# # ]
1997 : : }
1998 : :
1999 [ - + ]: 1154 : if ((int64_t) s.st_mtime != b_mtime)
2000 : : {
2001 [ # # ]: 0 : if (verbose)
2002 [ # # # # ]: 0 : obatched(clog) << "mtime mismatch for " << b_source0 << endl;
2003 : 0 : close(fd);
2004 : 0 : return 0;
2005 : : }
2006 : :
2007 [ + + ]: 1154 : if (!section.empty ())
2008 : : {
2009 : 6 : int scn_fd = extract_section (fd, s.st_mtime, b_source0, section, extract_begin);
2010 : 6 : close (fd);
2011 : :
2012 [ + + ]: 6 : if (scn_fd >= 0)
2013 : 4 : fd = scn_fd;
2014 : : else
2015 : : {
2016 [ + - ]: 2 : if (verbose)
2017 [ + - ]: 6 : obatched (clog) << "cannot find section " << section
2018 [ + - + - : 2 : << " for " << b_source0 << endl;
+ - ]
2019 : 2 : return 0;
2020 : : }
2021 : :
2022 : 4 : rc = fstat(fd, &s);
2023 [ - + ]: 4 : if (rc < 0)
2024 : : {
2025 : 0 : close (fd);
2026 [ # # # # ]: 0 : throw libc_exception (errno, string ("fstat ") + b_source0
2027 [ # # # # : 0 : + string (" ") + section);
# # # # ]
2028 : : }
2029 : : }
2030 : :
2031 : 1152 : struct MHD_Response* r = MHD_create_response_from_fd ((uint64_t) s.st_size, fd);
2032 [ + - + - : 2304 : inc_metric ("http_responses_total","result","file");
+ - ]
2033 [ - + ]: 1152 : if (r == 0)
2034 : : {
2035 [ # # ]: 0 : if (verbose)
2036 [ # # ]: 0 : obatched(clog) << "cannot create fd-response for " << b_source0
2037 [ # # # # : 0 : << " section=" << section << endl;
# # ]
2038 : 0 : close(fd);
2039 : : }
2040 : : else
2041 : : {
2042 : 1152 : add_mhd_response_header (r, "Content-Type", "application/octet-stream");
2043 [ + - ]: 1152 : add_mhd_response_header (r, "X-DEBUGINFOD-SIZE",
2044 : 1152 : to_string(s.st_size).c_str());
2045 : 1152 : add_mhd_response_header (r, "X-DEBUGINFOD-FILE", b_source0.c_str());
2046 : 1152 : add_mhd_last_modified (r, s.st_mtime);
2047 [ + - ]: 1152 : if (verbose > 1)
2048 [ + - + - : 2304 : obatched(clog) << "serving file " << b_source0 << " section=" << section << endl;
+ - + - ]
2049 : : /* libmicrohttpd will close it. */
2050 [ - + ]: 1152 : if (result_fd)
2051 : 1152 : *result_fd = fd;
2052 : : }
2053 : :
2054 : : return r;
2055 : : }
2056 : :
2057 : :
2058 : : #ifdef USE_LZMA
2059 : : struct lzma_exception: public reportable_exception
2060 : : {
2061 : 0 : lzma_exception(int rc, const string& msg):
2062 : : // liblzma doesn't have a lzma_ret -> string conversion function, so just
2063 : : // report the value.
2064 [ # # # # : 0 : reportable_exception(string ("lzma error: ") + msg + ": error " + to_string(rc)) {
# # ]
2065 [ # # # # : 0 : inc_metric("error_count","lzma",to_string(rc));
# # ]
2066 : 0 : }
2067 : : };
2068 : :
2069 : : // Neither RPM nor deb files support seeking to a specific file in the package.
2070 : : // Instead, to extract a specific file, we normally need to read the archive
2071 : : // sequentially until we find the file. This is very slow for files at the end
2072 : : // of a large package with lots of files, like kernel debuginfo.
2073 : : //
2074 : : // However, if the compression format used in the archive supports seeking, we
2075 : : // can accelerate this. As of July 2024, xz is the only widely-used format that
2076 : : // supports seeking, and usually only in multi-threaded mode. Luckily, the
2077 : : // kernel-debuginfo package in Fedora and its downstreams, and the
2078 : : // linux-image-*-dbg package in Debian and its downstreams, all happen to use
2079 : : // this.
2080 : : //
2081 : : // The xz format [1] ends with an index of independently compressed blocks in
2082 : : // the stream. In RPM and deb files, the xz stream is the last thing in the
2083 : : // file, so we assume that the xz Stream Footer is at the end of the package
2084 : : // file and do everything relative to that. For each file in the archive, we
2085 : : // remember the size and offset of the file data in the uncompressed xz stream,
2086 : : // then we use the index to seek to that offset when we need that file.
2087 : : //
2088 : : // 1: https://xz.tukaani.org/format/xz-file-format.txt
2089 : :
2090 : : // Return whether an archive supports seeking.
2091 : : static bool
2092 : 1092 : is_seekable_archive (const string& rps, struct archive* a)
2093 : : {
2094 : : // Only xz supports seeking.
2095 [ + + ]: 1092 : if (archive_filter_code (a, 0) != ARCHIVE_FILTER_XZ)
2096 : : return false;
2097 : :
2098 : 390 : int fd = open (rps.c_str(), O_RDONLY);
2099 [ - + ]: 390 : if (fd < 0)
2100 : : return false;
2101 : 390 : defer_dtor<int,int> fd_closer (fd, close);
2102 : :
2103 : : // Seek to the xz Stream Footer. We assume that it's the last thing in the
2104 : : // file, which is true for RPM and deb files.
2105 : 390 : off_t footer_pos = -LZMA_STREAM_HEADER_SIZE;
2106 [ - + ]: 390 : if (lseek (fd, footer_pos, SEEK_END) == -1)
2107 : : return false;
2108 : :
2109 : : // Decode the Stream Footer.
2110 : : uint8_t footer[LZMA_STREAM_HEADER_SIZE];
2111 : : size_t footer_read = 0;
2112 [ + + ]: 780 : while (footer_read < sizeof (footer))
2113 : : {
2114 [ - + ]: 390 : ssize_t bytes_read = read (fd, footer + footer_read,
2115 : : sizeof (footer) - footer_read);
2116 [ - + ]: 390 : if (bytes_read < 0)
2117 : : {
2118 [ # # ]: 0 : if (errno == EINTR)
2119 : 0 : continue;
2120 : : return false;
2121 : : }
2122 [ - + ]: 390 : if (bytes_read == 0)
2123 : : return false;
2124 : 390 : footer_read += bytes_read;
2125 : : }
2126 : :
2127 : 390 : lzma_stream_flags stream_flags;
2128 : 390 : lzma_ret ret = lzma_stream_footer_decode (&stream_flags, footer);
2129 [ - + ]: 390 : if (ret != LZMA_OK)
2130 : : return false;
2131 : :
2132 : : // Seek to the xz Index.
2133 [ - + ]: 390 : if (lseek (fd, footer_pos - stream_flags.backward_size, SEEK_END) == -1)
2134 : : return false;
2135 : :
2136 : : // Decode the Number of Records in the Index. liblzma doesn't have an API for
2137 : : // this if you don't want to decode the whole Index, so we have to do it
2138 : : // ourselves.
2139 : : //
2140 : : // We need 1 byte for the Index Indicator plus 1-9 bytes for the
2141 : : // variable-length integer Number of Records.
2142 : : uint8_t index[10];
2143 : : size_t index_read = 0;
2144 [ + + ]: 780 : while (index_read == 0) {
2145 [ + - ]: 390 : ssize_t bytes_read = read (fd, index, sizeof (index));
2146 [ - + ]: 390 : if (bytes_read < 0)
2147 : : {
2148 [ # # ]: 0 : if (errno == EINTR)
2149 : 0 : continue;
2150 : : return false;
2151 : : }
2152 [ - + ]: 390 : if (bytes_read == 0)
2153 : : return false;
2154 : 390 : index_read += bytes_read;
2155 : : }
2156 : : // The Index Indicator must be 0.
2157 [ - + ]: 390 : if (index[0] != 0)
2158 : : return false;
2159 : :
2160 : 390 : lzma_vli num_records;
2161 : 390 : size_t pos = 0;
2162 : 390 : size_t in_pos = 1;
2163 : 390 : while (true)
2164 : : {
2165 [ - + ]: 390 : if (in_pos >= index_read)
2166 : : {
2167 [ # # ]: 0 : ssize_t bytes_read = read (fd, index, sizeof (index));
2168 [ # # ]: 0 : if (bytes_read < 0)
2169 : : {
2170 [ # # ]: 0 : if (errno == EINTR)
2171 : 0 : continue;
2172 : : return false;
2173 : : }
2174 [ # # ]: 0 : if (bytes_read == 0)
2175 : : return false;
2176 : 0 : index_read = bytes_read;
2177 : 0 : in_pos = 0;
2178 : : }
2179 : 390 : ret = lzma_vli_decode (&num_records, &pos, index, &in_pos, index_read);
2180 [ - + ]: 390 : if (ret == LZMA_STREAM_END)
2181 : : break;
2182 [ # # ]: 0 : else if (ret != LZMA_OK)
2183 : : return false;
2184 : : }
2185 : :
2186 [ + + ]: 390 : if (verbose > 3)
2187 [ + - + - : 676 : obatched(clog) << rps << " has " << num_records << " xz Blocks" << endl;
+ - + - +
- ]
2188 : :
2189 : : // The file is only seekable if it has more than one Block.
2190 : 390 : return num_records > 1;
2191 : 390 : }
2192 : :
2193 : : // Read the Index at the end of an xz file.
2194 : : static lzma_index*
2195 : 202 : read_xz_index (int fd)
2196 : : {
2197 : 202 : off_t footer_pos = -LZMA_STREAM_HEADER_SIZE;
2198 [ - + ]: 202 : if (lseek (fd, footer_pos, SEEK_END) == -1)
2199 [ # # # # ]: 0 : throw libc_exception (errno, "lseek");
2200 : :
2201 : : uint8_t footer[LZMA_STREAM_HEADER_SIZE];
2202 : : size_t footer_read = 0;
2203 [ + + ]: 404 : while (footer_read < sizeof (footer))
2204 : : {
2205 [ - + ]: 202 : ssize_t bytes_read = read (fd, footer + footer_read,
2206 : : sizeof (footer) - footer_read);
2207 [ - + ]: 202 : if (bytes_read < 0)
2208 : : {
2209 [ # # ]: 0 : if (errno == EINTR)
2210 : 0 : continue;
2211 [ # # # # ]: 0 : throw libc_exception (errno, "read");
2212 : : }
2213 [ - + ]: 202 : if (bytes_read == 0)
2214 [ # # ]: 0 : throw reportable_exception ("truncated file");
2215 : 202 : footer_read += bytes_read;
2216 : : }
2217 : :
2218 : 202 : lzma_stream_flags stream_flags;
2219 : 202 : lzma_ret ret = lzma_stream_footer_decode (&stream_flags, footer);
2220 [ - + ]: 202 : if (ret != LZMA_OK)
2221 [ # # # # ]: 0 : throw lzma_exception (ret, "lzma_stream_footer_decode");
2222 : :
2223 [ - + ]: 202 : if (lseek (fd, footer_pos - stream_flags.backward_size, SEEK_END) == -1)
2224 [ # # # # ]: 0 : throw libc_exception (errno, "lseek");
2225 : :
2226 : 202 : lzma_stream strm = LZMA_STREAM_INIT;
2227 : 202 : lzma_index* index = NULL;
2228 : 202 : ret = lzma_index_decoder (&strm, &index, UINT64_MAX);
2229 [ - + ]: 202 : if (ret != LZMA_OK)
2230 [ # # # # ]: 0 : throw lzma_exception (ret, "lzma_index_decoder");
2231 : 202 : defer_dtor<lzma_stream*,void> strm_ender (&strm, lzma_end);
2232 : :
2233 : 202 : uint8_t in_buf[4096];
2234 : 202 : while (true)
2235 : : {
2236 [ + - ]: 202 : if (strm.avail_in == 0)
2237 : : {
2238 [ + - ]: 202 : ssize_t bytes_read = read (fd, in_buf, sizeof (in_buf));
2239 [ - + ]: 202 : if (bytes_read < 0)
2240 : : {
2241 [ # # ]: 0 : if (errno == EINTR)
2242 : 0 : continue;
2243 [ # # # # ]: 0 : throw libc_exception (errno, "read");
2244 : : }
2245 [ - + ]: 202 : if (bytes_read == 0)
2246 [ # # ]: 0 : throw reportable_exception ("truncated file");
2247 : 202 : strm.avail_in = bytes_read;
2248 : 202 : strm.next_in = in_buf;
2249 : : }
2250 : :
2251 : 202 : ret = lzma_code (&strm, LZMA_RUN);
2252 [ - + ]: 202 : if (ret == LZMA_STREAM_END)
2253 : : break;
2254 [ # # ]: 0 : else if (ret != LZMA_OK)
2255 [ # # # # ]: 0 : throw lzma_exception (ret, "lzma_code index");
2256 : : }
2257 : :
2258 : 202 : ret = lzma_index_stream_flags (index, &stream_flags);
2259 [ - + ]: 202 : if (ret != LZMA_OK)
2260 : : {
2261 : 0 : lzma_index_end (index, NULL);
2262 [ # # # # ]: 0 : throw lzma_exception (ret, "lzma_index_stream_flags");
2263 : : }
2264 : 202 : return index;
2265 : 202 : }
2266 : :
2267 : : static void
2268 : 202 : my_lzma_index_end (lzma_index* index)
2269 : : {
2270 : 202 : lzma_index_end (index, NULL);
2271 : 202 : }
2272 : :
2273 : : static void
2274 : 210 : free_lzma_block_filter_options (lzma_block* block)
2275 : : {
2276 [ + + ]: 1050 : for (int i = 0; i < LZMA_FILTERS_MAX; i++)
2277 : : {
2278 : 840 : free (block->filters[i].options);
2279 : 840 : block->filters[i].options = NULL;
2280 : : }
2281 : 210 : }
2282 : :
2283 : : static void
2284 : 202 : free_lzma_block_filters (lzma_block* block)
2285 : : {
2286 [ + - ]: 202 : if (block->filters != NULL)
2287 : : {
2288 : 202 : free_lzma_block_filter_options (block);
2289 : 202 : free (block->filters);
2290 : : }
2291 : 202 : }
2292 : :
2293 : : static void
2294 : 202 : extract_xz_blocks_into_fd (const string& srcpath,
2295 : : int src,
2296 : : int dst,
2297 : : lzma_index_iter* iter,
2298 : : uint64_t offset,
2299 : : uint64_t size)
2300 : : {
2301 : : // Seek to the Block. Seeking from the end using the compressed size from the
2302 : : // footer means we don't need to know where the xz stream starts in the
2303 : : // archive.
2304 [ - + ]: 202 : if (lseek (src,
2305 : 202 : (off_t) iter->block.compressed_stream_offset
2306 : 202 : - (off_t) iter->stream.compressed_size,
2307 : : SEEK_END) == -1)
2308 [ # # # # ]: 0 : throw libc_exception (errno, "lseek");
2309 : :
2310 : 202 : offset -= iter->block.uncompressed_file_offset;
2311 : :
2312 : 202 : lzma_block block{};
2313 : 202 : block.filters = (lzma_filter*) calloc (LZMA_FILTERS_MAX + 1,
2314 : : sizeof (lzma_filter));
2315 [ - + ]: 202 : if (block.filters == NULL)
2316 [ # # # # ]: 0 : throw libc_exception (ENOMEM, "cannot allocate lzma_block filters");
2317 : 202 : defer_dtor<lzma_block*,void> filters_freer (&block, free_lzma_block_filters);
2318 : :
2319 : 202 : uint8_t in_buf[4096];
2320 : 202 : uint8_t out_buf[4096];
2321 : 202 : size_t header_read = 0;
2322 : 202 : bool need_log_extracting = verbose > 3;
2323 : 8 : while (true)
2324 : : {
2325 : : // The first byte of the Block is the encoded Block Header Size. Read the
2326 : : // first byte and whatever extra fits in the buffer.
2327 [ + + ]: 412 : while (header_read == 0)
2328 : : {
2329 [ + - ]: 202 : ssize_t bytes_read = read (src, in_buf, sizeof (in_buf));
2330 [ - + ]: 202 : if (bytes_read < 0)
2331 : : {
2332 [ # # ]: 0 : if (errno == EINTR)
2333 : 0 : continue;
2334 [ # # # # ]: 0 : throw libc_exception (errno, "read");
2335 : : }
2336 [ - + ]: 202 : if (bytes_read == 0)
2337 [ # # ]: 0 : throw reportable_exception ("truncated file");
2338 : 202 : header_read += bytes_read;
2339 : : }
2340 : :
2341 : 210 : block.header_size = lzma_block_header_size_decode (in_buf[0]);
2342 : :
2343 : : // If we didn't buffer the whole Block Header earlier, get the rest.
2344 : 210 : eu_static_assert (sizeof (in_buf)
2345 : : >= lzma_block_header_size_decode (UINT8_MAX));
2346 [ - + ]: 210 : while (header_read < block.header_size)
2347 : : {
2348 [ # # ]: 0 : ssize_t bytes_read = read (src, in_buf + header_read,
2349 : : sizeof (in_buf) - header_read);
2350 [ # # ]: 0 : if (bytes_read < 0)
2351 : : {
2352 [ # # ]: 0 : if (errno == EINTR)
2353 : 0 : continue;
2354 [ # # # # ]: 0 : throw libc_exception (errno, "read");
2355 : : }
2356 [ # # ]: 0 : if (bytes_read == 0)
2357 [ # # ]: 0 : throw reportable_exception ("truncated file");
2358 : 0 : header_read += bytes_read;
2359 : : }
2360 : :
2361 : : // Decode the Block Header.
2362 : 210 : block.check = iter->stream.flags->check;
2363 : 210 : lzma_ret ret = lzma_block_header_decode (&block, NULL, in_buf);
2364 [ - + ]: 210 : if (ret != LZMA_OK)
2365 [ # # # # ]: 0 : throw lzma_exception (ret, "lzma_block_header_decode");
2366 : 210 : ret = lzma_block_compressed_size (&block, iter->block.unpadded_size);
2367 [ - + ]: 210 : if (ret != LZMA_OK)
2368 [ # # # # ]: 0 : throw lzma_exception (ret, "lzma_block_compressed_size");
2369 : :
2370 : : // Start decoding the Block data.
2371 : 210 : lzma_stream strm = LZMA_STREAM_INIT;
2372 : 210 : ret = lzma_block_decoder (&strm, &block);
2373 [ - + ]: 210 : if (ret != LZMA_OK)
2374 [ # # # # ]: 0 : throw lzma_exception (ret, "lzma_block_decoder");
2375 : 210 : defer_dtor<lzma_stream*,void> strm_ender (&strm, lzma_end);
2376 : :
2377 : : // We might still have some input buffered from when we read the header.
2378 : 210 : strm.avail_in = header_read - block.header_size;
2379 : 210 : strm.next_in = in_buf + block.header_size;
2380 : 210 : strm.avail_out = sizeof (out_buf);
2381 : 210 : strm.next_out = out_buf;
2382 : 11734 : while (true)
2383 : : {
2384 [ + + ]: 11734 : if (strm.avail_in == 0)
2385 : : {
2386 [ + - ]: 14 : ssize_t bytes_read = read (src, in_buf, sizeof (in_buf));
2387 [ - + ]: 14 : if (bytes_read < 0)
2388 : : {
2389 [ # # ]: 0 : if (errno == EINTR)
2390 : 0 : continue;
2391 [ # # # # ]: 0 : throw libc_exception (errno, "read");
2392 : : }
2393 [ - + ]: 14 : if (bytes_read == 0)
2394 [ # # ]: 0 : throw reportable_exception ("truncated file");
2395 : 14 : strm.avail_in = bytes_read;
2396 : 14 : strm.next_in = in_buf;
2397 : : }
2398 : :
2399 : 11734 : ret = lzma_code (&strm, LZMA_RUN);
2400 [ - + ]: 11734 : if (ret != LZMA_OK && ret != LZMA_STREAM_END)
2401 [ # # # # ]: 0 : throw lzma_exception (ret, "lzma_code block");
2402 : :
2403 : : // Throw away anything we decode until we reach the offset, then
2404 : : // start writing to the destination.
2405 [ + + ]: 11734 : if (strm.total_out > offset)
2406 : : {
2407 : 3006 : size_t bytes_to_write = strm.next_out - out_buf;
2408 : 3006 : uint8_t* buf_to_write = out_buf;
2409 : :
2410 : : // Ignore anything in the buffer before the offset.
2411 [ + + ]: 3006 : if (bytes_to_write > strm.total_out - offset)
2412 : : {
2413 : 196 : buf_to_write += bytes_to_write - (strm.total_out - offset);
2414 : 196 : bytes_to_write = strm.total_out - offset;
2415 : : }
2416 : :
2417 : : // Ignore anything after the size.
2418 [ + + ]: 3006 : if (strm.total_out - offset >= size)
2419 : 202 : bytes_to_write -= strm.total_out - offset - size;
2420 : :
2421 [ + + ]: 3006 : if (need_log_extracting)
2422 : : {
2423 [ + - + - ]: 240 : obatched(clog) << "extracting from xz archive " << srcpath
2424 [ + - + - : 120 : << " size=" << size << endl;
+ - ]
2425 : 120 : need_log_extracting = false;
2426 : : }
2427 : :
2428 [ + + ]: 6012 : while (bytes_to_write > 0)
2429 : : {
2430 [ + - ]: 3006 : ssize_t written = write (dst, buf_to_write, bytes_to_write);
2431 [ - + ]: 3006 : if (written < 0)
2432 : : {
2433 [ # # ]: 0 : if (errno == EAGAIN)
2434 : 0 : continue;
2435 [ # # # # ]: 0 : throw libc_exception (errno, "write");
2436 : : }
2437 : 3006 : bytes_to_write -= written;
2438 : 3006 : buf_to_write += written;
2439 : : }
2440 : :
2441 : : // If we reached the size, we're done.
2442 [ + + ]: 3006 : if (strm.total_out - offset >= size)
2443 : 202 : return;
2444 : : }
2445 : :
2446 : 11532 : strm.avail_out = sizeof (out_buf);
2447 : 11532 : strm.next_out = out_buf;
2448 : :
2449 [ + + ]: 11532 : if (ret == LZMA_STREAM_END)
2450 : : break;
2451 : : }
2452 : :
2453 : : // This Block didn't have enough data. Go to the next one.
2454 [ - + ]: 8 : if (lzma_index_iter_next (iter, LZMA_INDEX_ITER_BLOCK))
2455 [ # # ]: 0 : throw reportable_exception ("no more blocks");
2456 [ + - ]: 8 : if (strm.total_out > offset)
2457 : 8 : size -= strm.total_out - offset;
2458 : 8 : offset = 0;
2459 : : // If we had any buffered input left, move it to the beginning of the
2460 : : // buffer to decode the next Block Header.
2461 [ + - ]: 8 : if (strm.avail_in > 0)
2462 : : {
2463 : 8 : memmove (in_buf, strm.next_in, strm.avail_in);
2464 : 8 : header_read = strm.avail_in;
2465 : : }
2466 : : else
2467 : : header_read = 0;
2468 : 8 : free_lzma_block_filter_options (&block);
2469 : 210 : }
2470 : 202 : }
2471 : :
2472 : : static int
2473 : 202 : extract_from_seekable_archive (const string& srcpath,
2474 : : char* tmppath,
2475 : : uint64_t offset,
2476 : : uint64_t size)
2477 : : {
2478 [ + - + - : 404 : inc_metric ("seekable_archive_extraction_attempts","type","xz");
+ - ]
2479 : 202 : try
2480 : : {
2481 [ + - ]: 202 : int src = open (srcpath.c_str(), O_RDONLY);
2482 [ - + ]: 202 : if (src < 0)
2483 [ # # # # : 0 : throw libc_exception (errno, string("open ") + srcpath);
# # ]
2484 : 202 : defer_dtor<int,int> src_closer (src, close);
2485 : :
2486 [ + - ]: 202 : lzma_index* index = read_xz_index (src);
2487 : 202 : defer_dtor<lzma_index*,void> index_ender (index, my_lzma_index_end);
2488 : :
2489 : : // Find the Block containing the offset.
2490 : 202 : lzma_index_iter iter;
2491 : 202 : lzma_index_iter_init (&iter, index);
2492 [ - + ]: 202 : if (lzma_index_iter_locate (&iter, offset))
2493 [ # # ]: 0 : throw reportable_exception ("offset not found");
2494 : :
2495 [ + + ]: 202 : if (verbose > 3)
2496 [ + - + - ]: 360 : obatched(clog) << "seeking in xz archive " << srcpath
2497 [ + - + - : 120 : << " offset=" << offset << " block_offset="
+ - ]
2498 [ + - + - ]: 120 : << iter.block.uncompressed_file_offset << endl;
2499 : :
2500 [ + - ]: 202 : int dst = mkstemp (tmppath);
2501 [ - + ]: 202 : if (dst < 0)
2502 [ # # # # ]: 0 : throw libc_exception (errno, "cannot create temporary file");
2503 : :
2504 : 202 : try
2505 : : {
2506 [ + - ]: 202 : extract_xz_blocks_into_fd (srcpath, src, dst, &iter, offset, size);
2507 : : }
2508 : 0 : catch (...)
2509 : : {
2510 : 0 : unlink (tmppath);
2511 [ - - ]: 0 : close (dst);
2512 : 0 : throw;
2513 : 0 : }
2514 : :
2515 [ + - + - : 404 : inc_metric ("seekable_archive_extraction_successes","type","xz");
+ - + - ]
2516 : 202 : return dst;
2517 : 202 : }
2518 [ - - ]: 0 : catch (const reportable_exception &e)
2519 : : {
2520 [ - - - - : 0 : inc_metric ("seekable_archive_extraction_failures","type","xz");
- - - - ]
2521 [ - - ]: 0 : if (verbose)
2522 [ - - - - ]: 0 : obatched(clog) << "failed to extract from seekable xz archive "
2523 [ - - - - : 0 : << srcpath << ": " << e.message << endl;
- - ]
2524 : 0 : return -1;
2525 : 0 : }
2526 : : }
2527 : : #else
2528 : : static bool
2529 : : is_seekable_archive (const string& rps __attribute__ ((unused)),
2530 : : struct archive* a __attribute__ ((unused)))
2531 : : {
2532 : : return false;
2533 : : }
2534 : : static int
2535 : : extract_from_seekable_archive (const string& srcpath __attribute__ ((unused)),
2536 : : char* tmppath __attribute__ ((unused)),
2537 : : uint64_t offset __attribute__ ((unused)),
2538 : : uint64_t size __attribute__ ((unused)))
2539 : : {
2540 : : return -1;
2541 : : }
2542 : : #endif
2543 : :
2544 : :
2545 : : // For security/portability reasons, many distro-package archives have
2546 : : // a "./" in front of path names; others have nothing, others have
2547 : : // "/". Canonicalize them all to a single leading "/", with the
2548 : : // assumption that this matches the dwarf-derived file names too.
2549 : 3966 : string canonicalized_archive_entry_pathname(struct archive_entry *e)
2550 : : {
2551 : 3966 : string fn = archive_entry_pathname(e);
2552 [ - + ]: 3966 : if (fn.size() == 0)
2553 : 0 : return fn;
2554 [ - + ]: 3966 : if (fn[0] == '/')
2555 : 0 : return fn;
2556 [ + + ]: 3966 : if (fn[0] == '.')
2557 [ + - ]: 2726 : return fn.substr(1);
2558 : : else
2559 [ + - + - ]: 2480 : return string("/")+fn;
2560 : 3966 : }
2561 : :
2562 : :
2563 : : // NB: takes ownership of, and may reassign, fd.
2564 : : static struct MHD_Response*
2565 : 1000 : create_buildid_r_response (int64_t b_mtime0,
2566 : : const string& b_source0,
2567 : : const string& b_source1,
2568 : : const string& section,
2569 : : const string& ima_sig,
2570 : : const char* tmppath,
2571 : : int& fd,
2572 : : off_t size,
2573 : : time_t mtime,
2574 : : const string& metric,
2575 : : const struct timespec& extract_begin)
2576 : : {
2577 [ + + ]: 1000 : if (tmppath != NULL)
2578 : : {
2579 : 900 : struct timespec extract_end;
2580 : 900 : clock_gettime (CLOCK_MONOTONIC, &extract_end);
2581 : 900 : double extract_time = (extract_end.tv_sec - extract_begin.tv_sec)
2582 : 900 : + (extract_end.tv_nsec - extract_begin.tv_nsec)/1.e9;
2583 [ + - ]: 1800 : fdcache.intern(b_source0, b_source1, tmppath, size, true, extract_time);
2584 : : }
2585 : :
2586 [ + + ]: 1000 : if (!section.empty ())
2587 : : {
2588 [ + - ]: 6 : int scn_fd = extract_section (fd, b_mtime0,
2589 [ + - ]: 12 : b_source0 + ":" + b_source1,
2590 : : section, extract_begin);
2591 : 6 : close (fd);
2592 [ + + ]: 6 : if (scn_fd >= 0)
2593 : 4 : fd = scn_fd;
2594 : : else
2595 : : {
2596 [ + - ]: 2 : if (verbose)
2597 [ + - ]: 6 : obatched (clog) << "cannot find section " << section
2598 : : << " for archive " << b_source0
2599 [ + - + - : 2 : << " file " << b_source1 << endl;
+ - + - +
- ]
2600 : 2 : return 0;
2601 : : }
2602 : :
2603 : 4 : struct stat fs;
2604 [ - + ]: 4 : if (fstat (fd, &fs) < 0)
2605 : : {
2606 : 0 : close (fd);
2607 : 0 : throw libc_exception (errno,
2608 [ # # # # : 0 : string ("fstat ") + b_source0 + string (" ") + section);
# # # # #
# # # ]
2609 : : }
2610 : 4 : size = fs.st_size;
2611 : : }
2612 : :
2613 : 998 : struct MHD_Response* r = MHD_create_response_from_fd (size, fd);
2614 [ - + ]: 998 : if (r == 0)
2615 : : {
2616 [ # # ]: 0 : if (verbose)
2617 [ # # # # ]: 0 : obatched(clog) << "cannot create fd-response for " << b_source0 << endl;
2618 : 0 : close(fd);
2619 : : }
2620 : : else
2621 : : {
2622 [ + - + - ]: 1996 : inc_metric ("http_responses_total","result",metric);
2623 : 998 : add_mhd_response_header (r, "Content-Type", "application/octet-stream");
2624 [ + - ]: 998 : add_mhd_response_header (r, "X-DEBUGINFOD-SIZE", to_string(size).c_str());
2625 : 998 : add_mhd_response_header (r, "X-DEBUGINFOD-ARCHIVE", b_source0.c_str());
2626 : 998 : add_mhd_response_header (r, "X-DEBUGINFOD-FILE", b_source1.c_str());
2627 [ - + ]: 998 : if(!ima_sig.empty()) add_mhd_response_header(r, "X-DEBUGINFOD-IMASIGNATURE", ima_sig.c_str());
2628 : 998 : add_mhd_last_modified (r, mtime);
2629 [ - + ]: 998 : if (verbose > 1)
2630 [ + - ]: 2994 : obatched(clog) << "serving " << metric << " " << b_source0
2631 : : << " file " << b_source1
2632 : : << " section=" << section
2633 [ + - + - : 998 : << " IMA signature=" << ima_sig << endl;
+ - + - +
- + - + -
+ - + - ]
2634 : : /* libmicrohttpd will close fd. */
2635 : : }
2636 : : return r;
2637 : : }
2638 : :
2639 : : static struct MHD_Response*
2640 : 1058 : handle_buildid_r_match (bool internal_req_p,
2641 : : int64_t b_mtime,
2642 : : const string& b_source0,
2643 : : const string& b_source1,
2644 : : int64_t b_id0,
2645 : : int64_t b_id1,
2646 : : const string& section,
2647 : : int *result_fd)
2648 : : {
2649 : 1058 : struct timespec extract_begin;
2650 : 1058 : clock_gettime (CLOCK_MONOTONIC, &extract_begin);
2651 : :
2652 : 1058 : struct stat fs;
2653 : 1058 : int rc = stat (b_source0.c_str(), &fs);
2654 [ + + ]: 1058 : if (rc != 0)
2655 [ + - + - : 116 : throw libc_exception (errno, string("stat ") + b_source0);
+ - ]
2656 : :
2657 [ - + ]: 1000 : if ((int64_t) fs.st_mtime != b_mtime)
2658 : : {
2659 [ # # ]: 0 : if (verbose)
2660 [ # # # # ]: 0 : obatched(clog) << "mtime mismatch for " << b_source0 << endl;
2661 : 0 : return 0;
2662 : : }
2663 : :
2664 : : // Extract the IMA per-file signature (if it exists)
2665 : 1000 : string ima_sig = "";
2666 : : #ifdef ENABLE_IMA_VERIFICATION
2667 : : do
2668 : : {
2669 : : FD_t rpm_fd;
2670 : : if(!(rpm_fd = Fopen(b_source0.c_str(), "r.ufdio"))) // read, uncompressed, rpm/rpmio.h
2671 : : {
2672 : : if (verbose) obatched(clog) << "There was an error while opening " << b_source0 << endl;
2673 : : break; // Exit IMA extraction
2674 : : }
2675 : :
2676 : : Header rpm_hdr;
2677 : : if(RPMRC_FAIL == rpmReadPackageFile(NULL, rpm_fd, b_source0.c_str(), &rpm_hdr))
2678 : : {
2679 : : if (verbose) obatched(clog) << "There was an error while reading the header of " << b_source0 << endl;
2680 : : Fclose(rpm_fd);
2681 : : break; // Exit IMA extraction
2682 : : }
2683 : :
2684 : : // Fill sig_tag_data with an alloc'd copy of the array of IMA signatures (if they exist)
2685 : : struct rpmtd_s sig_tag_data;
2686 : : rpmtdReset(&sig_tag_data);
2687 : : do{ /* A do-while so we can break out of the koji sigcache checking on failure */
2688 : : if(requires_koji_sigcache_mapping)
2689 : : {
2690 : : /* NB: Koji builds result in a directory structure like the following
2691 : : - PACKAGE/VERSION/RELEASE
2692 : : - ARCH1
2693 : : - foo.rpm // The rpm known by debuginfod
2694 : : - ...
2695 : : - ARCHN
2696 : : - data
2697 : : - signed // Periodically purged (and not scanned by debuginfod)
2698 : : - sigcache
2699 : : - ARCH1
2700 : : - foo.rpm.sig // An empty rpm header
2701 : : - ...
2702 : : - ARCHN
2703 : : - PACKAGE_KEYID1
2704 : : - ARCH1
2705 : : - foo.rpm.sig // The header of the signed rpm. This is the file we need to extract the IMA signatures
2706 : : - ...
2707 : : - ARCHN
2708 : : - ...
2709 : : - PACKAGE_KEYIDn
2710 : :
2711 : : We therefore need to do a mapping:
2712 : :
2713 : : P/V/R/A/N-V-R.A.rpm ->
2714 : : P/V/R/data/sigcache/KEYID/A/N-V-R.A.rpm.sig
2715 : :
2716 : : There are 2 key insights here
2717 : :
2718 : : 1. We need to go 2 directories down from sigcache to get to the
2719 : : rpm header. So to distinguish ARCH1/foo.rpm.sig and
2720 : : PACKAGE_KEYID1/ARCH1/foo.rpm.sig we can look 2 directories down
2721 : :
2722 : : 2. It's safe to assume that the user will have all of the
2723 : : required verification certs. So we can pick from any of the
2724 : : PACKAGE_KEYID* directories. For simplicity we choose first we
2725 : : match against
2726 : :
2727 : : See: https://pagure.io/koji/issue/3670
2728 : : */
2729 : :
2730 : : // Do the mapping from b_source0 to the koji path for the signed rpm header
2731 : : string signed_rpm_path = b_source0;
2732 : : size_t insert_pos = string::npos;
2733 : : for(int i = 0; i < 2; i++) insert_pos = signed_rpm_path.rfind("/", insert_pos) - 1;
2734 : : string globbed_path = signed_rpm_path.insert(insert_pos + 1, "/data/sigcache/*").append(".sig"); // The globbed path we're seeking
2735 : : glob_t pglob;
2736 : : int grc;
2737 : : if(0 != (grc = glob(globbed_path.c_str(), GLOB_NOSORT, NULL, &pglob)))
2738 : : {
2739 : : // Break out, but only report real errors
2740 : : if (verbose && grc != GLOB_NOMATCH) obatched(clog) << "There was an error (" << strerror(errno) << ") globbing " << globbed_path << endl;
2741 : : break; // Exit koji sigcache check
2742 : : }
2743 : : signed_rpm_path = pglob.gl_pathv[0]; // See insight 2 above
2744 : : globfree(&pglob);
2745 : :
2746 : : if (verbose > 2) obatched(clog) << "attempting IMA signature extraction from koji header " << signed_rpm_path << endl;
2747 : :
2748 : : FD_t sig_rpm_fd;
2749 : : if(NULL == (sig_rpm_fd = Fopen(signed_rpm_path.c_str(), "r")))
2750 : : {
2751 : : if (verbose) obatched(clog) << "There was an error while opening " << signed_rpm_path << endl;
2752 : : break; // Exit koji sigcache check
2753 : : }
2754 : :
2755 : : Header sig_hdr = headerRead(sig_rpm_fd, HEADER_MAGIC_YES /* Validate magic too */ );
2756 : : if (!sig_hdr || 1 != headerGet(sig_hdr, RPMSIGTAG_FILESIGNATURES, &sig_tag_data, HEADERGET_ALLOC))
2757 : : {
2758 : : if (verbose) obatched(clog) << "Unable to extract RPMSIGTAG_FILESIGNATURES from " << signed_rpm_path << endl;
2759 : : }
2760 : : headerFree(sig_hdr); // We can free here since sig_tag_data has an alloc'd copy of the data
2761 : : Fclose(sig_rpm_fd);
2762 : : }
2763 : : }while(false);
2764 : :
2765 : : if(0 == sig_tag_data.count)
2766 : : {
2767 : : // In the general case (or a fallback from the koji sigcache mapping not finding signatures)
2768 : : // we can just (try) extract the signatures from the rpm header
2769 : : if (1 != headerGet(rpm_hdr, RPMTAG_FILESIGNATURES, &sig_tag_data, HEADERGET_ALLOC))
2770 : : {
2771 : : if (verbose) obatched(clog) << "Unable to extract RPMTAG_FILESIGNATURES from " << b_source0 << endl;
2772 : : }
2773 : : }
2774 : : // Search the array for the signature coresponding to b_source1
2775 : : int idx = -1;
2776 : : char *sig = NULL;
2777 : : rpmfi hdr_fi = rpmfiNew(NULL, rpm_hdr, RPMTAG_BASENAMES, RPMFI_FLAGS_QUERY);
2778 : : do
2779 : : {
2780 : : sig = (char*)rpmtdNextString(&sig_tag_data);
2781 : : idx = rpmfiNext(hdr_fi);
2782 : : }
2783 : : while (idx != -1 && 0 != strcmp(b_source1.c_str(), rpmfiFN(hdr_fi)));
2784 : : rpmfiFree(hdr_fi);
2785 : :
2786 : : if(sig && 0 != strlen(sig) && idx != -1)
2787 : : {
2788 : : if (verbose > 2) obatched(clog) << "Found IMA signature for " << b_source1 << ":\n" << sig << endl;
2789 : : ima_sig = sig;
2790 : : inc_metric("http_responses_total","extra","ima-sigs-extracted");
2791 : : }
2792 : : else
2793 : : {
2794 : : if (verbose > 2) obatched(clog) << "Could not find IMA signature for " << b_source1 << endl;
2795 : : }
2796 : :
2797 : : rpmtdFreeData (&sig_tag_data);
2798 : : headerFree(rpm_hdr);
2799 : : Fclose(rpm_fd);
2800 : : } while(false);
2801 : : #endif
2802 : :
2803 : : // check for a match in the fdcache first
2804 [ + - ]: 1000 : int fd = fdcache.lookup(b_source0, b_source1);
2805 [ + + ]: 1000 : while (fd >= 0) // got one!; NB: this is really an if() with a possible branch out to the end
2806 : : {
2807 : 100 : rc = fstat(fd, &fs);
2808 [ - + ]: 100 : if (rc < 0) // disappeared?
2809 : : {
2810 [ # # ]: 0 : if (verbose)
2811 [ # # # # : 0 : obatched(clog) << "cannot fstat fdcache " << b_source0 << endl;
# # ]
2812 [ # # ]: 0 : close(fd);
2813 [ # # ]: 0 : fdcache.clear(b_source0, b_source1);
2814 : : break; // branch out of if "loop", to try new libarchive fetch attempt
2815 : : }
2816 : :
2817 [ + - + - ]: 100 : struct MHD_Response* r = create_buildid_r_response (b_mtime, b_source0,
2818 : : b_source1, section,
2819 : : ima_sig, NULL, fd,
2820 : : fs.st_size,
2821 : : fs.st_mtime,
2822 : : "archive fdcache",
2823 : : extract_begin);
2824 [ + - ]: 100 : if (r == 0)
2825 : : break; // branch out of if "loop", to try new libarchive fetch attempt
2826 [ + - ]: 100 : if (result_fd)
2827 : 100 : *result_fd = fd;
2828 : : return r;
2829 : : // NB: see, we never go around the 'loop' more than once
2830 : : }
2831 : :
2832 : : // no match ... look for a seekable entry
2833 : 900 : bool populate_seekable = ! passive_p;
2834 : 900 : unique_ptr<sqlite_ps> pp (new sqlite_ps (internal_req_p ? db : dbq,
2835 : : "rpm-seekable-query",
2836 : : "select type, size, offset, mtime from " BUILDIDS "_r_seekable "
2837 [ + - + - : 900 : "where file = ? and content = ?"));
+ - + + +
- ]
2838 [ + - + - : 900 : rc = pp->reset().bind(1, b_id0).bind(2, b_id1).step();
+ - + - ]
2839 [ + + ]: 900 : if (rc != SQLITE_DONE)
2840 : : {
2841 [ - + ]: 202 : if (rc != SQLITE_ROW)
2842 [ # # # # ]: 0 : throw sqlite_exception(rc, "step");
2843 : : // if we found a match in _r_seekable but we fail to extract it, don't
2844 : : // bother populating it again
2845 : 202 : populate_seekable = false;
2846 [ + - ]: 202 : const char* seekable_type = (const char*) sqlite3_column_text (*pp, 0);
2847 [ + - - + ]: 202 : if (seekable_type != NULL && strcmp (seekable_type, "xz") == 0)
2848 : : {
2849 [ + - ]: 202 : int64_t seekable_size = sqlite3_column_int64 (*pp, 1);
2850 [ + - ]: 202 : int64_t seekable_offset = sqlite3_column_int64 (*pp, 2);
2851 [ + - ]: 202 : int64_t seekable_mtime = sqlite3_column_int64 (*pp, 3);
2852 : :
2853 : 202 : char* tmppath = NULL;
2854 [ - + ]: 202 : if (asprintf (&tmppath, "%s/debuginfod-fdcache.XXXXXX", tmpdir.c_str()) < 0)
2855 [ # # # # ]: 0 : throw libc_exception (ENOMEM, "cannot allocate tmppath");
2856 : 202 : defer_dtor<void*,void> tmmpath_freer (tmppath, free);
2857 : :
2858 [ + - ]: 202 : fd = extract_from_seekable_archive (b_source0, tmppath,
2859 : : seekable_offset, seekable_size);
2860 [ + - ]: 202 : if (fd >= 0)
2861 : : {
2862 : : // Set the mtime so the fdcache file mtimes propagate to future webapi
2863 : : // clients.
2864 : 202 : struct timespec tvs[2];
2865 : 202 : tvs[0].tv_sec = 0;
2866 : 202 : tvs[0].tv_nsec = UTIME_OMIT;
2867 : 202 : tvs[1].tv_sec = seekable_mtime;
2868 : 202 : tvs[1].tv_nsec = 0;
2869 : 202 : (void) futimens (fd, tvs); /* best effort */
2870 [ + - + - ]: 202 : struct MHD_Response* r = create_buildid_r_response (b_mtime,
2871 : : b_source0,
2872 : : b_source1,
2873 : : section,
2874 : : ima_sig,
2875 : : tmppath, fd,
2876 : : seekable_size,
2877 : : seekable_mtime,
2878 : : "seekable xz archive",
2879 : : extract_begin);
2880 [ + - ]: 202 : if (r != 0 && result_fd)
2881 : 202 : *result_fd = fd;
2882 : 202 : return r;
2883 : : }
2884 : 202 : }
2885 : : }
2886 : 698 : pp.reset();
2887 : :
2888 : : // still no match ... grumble, must process the archive
2889 [ + - ]: 698 : string archive_decoder = "/dev/null";
2890 [ + - ]: 698 : string archive_extension = "";
2891 [ + + ]: 1942 : for (auto&& arch : scan_archives)
2892 [ + + ]: 1244 : if (string_endswith(b_source0, arch.first))
2893 : : {
2894 [ + - ]: 698 : archive_extension = arch.first;
2895 [ + - ]: 1942 : archive_decoder = arch.second;
2896 : : }
2897 : 698 : FILE* fp;
2898 : :
2899 : 698 : defer_dtor<FILE*,int>::dtor_fn dfn;
2900 [ + + ]: 698 : if (archive_decoder != "cat")
2901 : : {
2902 [ + - + - : 1056 : string popen_cmd = archive_decoder + " " + shell_escape(b_source0);
+ - ]
2903 [ + - ]: 528 : fp = popen (popen_cmd.c_str(), "r"); // "e" O_CLOEXEC?
2904 : 528 : dfn = pclose;
2905 [ - + ]: 528 : if (fp == NULL)
2906 [ # # # # : 0 : throw libc_exception (errno, string("popen ") + popen_cmd);
# # ]
2907 : 528 : }
2908 : : else
2909 : : {
2910 [ + - ]: 170 : fp = fopen (b_source0.c_str(), "r");
2911 : 170 : dfn = fclose;
2912 [ - + ]: 170 : if (fp == NULL)
2913 [ # # # # : 0 : throw libc_exception (errno, string("fopen ") + b_source0);
# # ]
2914 : : }
2915 : 698 : defer_dtor<FILE*,int> fp_closer (fp, dfn);
2916 : :
2917 : 698 : struct archive *a;
2918 [ + - ]: 698 : a = archive_read_new();
2919 [ - + ]: 698 : if (a == NULL)
2920 [ # # # # ]: 0 : throw archive_exception("cannot create archive reader");
2921 : 698 : defer_dtor<struct archive*,int> archive_closer (a, archive_read_free);
2922 : :
2923 [ + - ]: 698 : rc = archive_read_support_format_all(a);
2924 [ - + ]: 698 : if (rc != ARCHIVE_OK)
2925 [ # # # # ]: 0 : throw archive_exception(a, "cannot select all format");
2926 [ + - ]: 698 : rc = archive_read_support_filter_all(a);
2927 [ - + ]: 698 : if (rc != ARCHIVE_OK)
2928 [ # # # # ]: 0 : throw archive_exception(a, "cannot select all filters");
2929 : :
2930 [ + - ]: 698 : rc = archive_read_open_FILE (a, fp);
2931 [ - + ]: 698 : if (rc != ARCHIVE_OK)
2932 : : {
2933 [ # # # # : 0 : obatched(clog) << "cannot open archive from pipe " << b_source0 << endl;
# # ]
2934 [ # # # # ]: 0 : throw archive_exception(a, "cannot open archive from pipe");
2935 : : }
2936 : :
2937 : : // If the archive was scanned in a version without _r_seekable, then we may
2938 : : // need to populate _r_seekable now. This can be removed the next time
2939 : : // BUILDIDS is updated.
2940 [ + + ]: 698 : if (populate_seekable)
2941 : : {
2942 [ + - ]: 696 : populate_seekable = is_seekable_archive (b_source0, a);
2943 [ + - ]: 696 : if (populate_seekable)
2944 : : {
2945 : : // NB: the names are already interned
2946 : 0 : pp.reset(new sqlite_ps (db, "rpm-seekable-insert2",
2947 : : "insert or ignore into " BUILDIDS "_r_seekable (file, content, type, size, offset, mtime) "
2948 : : "values (?, "
2949 : : "(select id from " BUILDIDS "_files "
2950 : : "where dirname = (select id from " BUILDIDS "_fileparts where name = ?) "
2951 : : "and basename = (select id from " BUILDIDS "_fileparts where name = ?) "
2952 [ # # # # : 0 : "), 'xz', ?, ?, ?)"));
# # # # ]
2953 : : }
2954 : : }
2955 : :
2956 : : // archive traversal is in five stages:
2957 : : // 1) before we find a matching entry, insert it into _r_seekable if needed or
2958 : : // skip it otherwise
2959 : : // 2) extract the matching entry (set r = result). Also insert it into
2960 : : // _r_seekable if needed
2961 : : // 3) extract some number of prefetched entries (just into fdcache). Also
2962 : : // insert them into _r_seekable if needed
2963 : : // 4) if needed, insert all of the remaining entries into _r_seekable
2964 : : // 5) abort any further processing
2965 : 698 : struct MHD_Response* r = 0; // will set in stage 2
2966 [ + + ]: 698 : unsigned prefetch_count =
2967 : : internal_req_p ? 0 : fdcache_prefetch; // will decrement in stage 3
2968 : :
2969 [ + + - + ]: 10822 : while(r == 0 || prefetch_count > 0 || populate_seekable) // stage 1-4
2970 : : {
2971 [ + - ]: 10782 : if (interrupted)
2972 : : break;
2973 : :
2974 : 10782 : struct archive_entry *e;
2975 [ + - ]: 10782 : rc = archive_read_next_header (a, &e);
2976 [ + + ]: 10782 : if (rc != ARCHIVE_OK)
2977 : : break;
2978 : :
2979 [ + - + + ]: 10126 : if (! S_ISREG(archive_entry_mode (e))) // skip non-files completely
2980 : 9428 : continue;
2981 : :
2982 [ + - ]: 2926 : string fn = canonicalized_archive_entry_pathname (e);
2983 : :
2984 [ - + ]: 2926 : if (populate_seekable)
2985 : : {
2986 : 0 : string dn, bn;
2987 : 0 : size_t slash = fn.rfind('/');
2988 [ # # ]: 0 : if (slash == std::string::npos) {
2989 [ # # ]: 0 : dn = "";
2990 [ # # ]: 0 : bn = fn;
2991 : : } else {
2992 [ # # ]: 0 : dn = fn.substr(0, slash);
2993 [ # # ]: 0 : bn = fn.substr(slash + 1);
2994 : : }
2995 : :
2996 [ # # ]: 0 : int64_t seekable_size = archive_entry_size (e);
2997 [ # # ]: 0 : int64_t seekable_offset = archive_filter_bytes (a, 0);
2998 [ # # ]: 0 : time_t seekable_mtime = archive_entry_mtime (e);
2999 : :
3000 [ # # ]: 0 : pp->reset();
3001 [ # # ]: 0 : pp->bind(1, b_id0);
3002 [ # # ]: 0 : pp->bind(2, dn);
3003 [ # # ]: 0 : pp->bind(3, bn);
3004 [ # # ]: 0 : pp->bind(4, seekable_size);
3005 [ # # ]: 0 : pp->bind(5, seekable_offset);
3006 [ # # ]: 0 : pp->bind(6, seekable_mtime);
3007 [ # # ]: 0 : rc = pp->step();
3008 [ # # ]: 0 : if (rc != SQLITE_DONE)
3009 [ # # # # ]: 0 : obatched(clog) << "recording seekable file=" << fn
3010 [ # # # # : 0 : << " sqlite3 error: " << (sqlite3_errstr(rc) ?: "?") << endl;
# # # # #
# ]
3011 [ # # ]: 0 : else if (verbose > 2)
3012 [ # # # # ]: 0 : obatched(clog) << "recorded seekable file=" << fn
3013 [ # # # # ]: 0 : << " size=" << seekable_size
3014 [ # # # # ]: 0 : << " offset=" << seekable_offset
3015 [ # # # # : 0 : << " mtime=" << seekable_mtime << endl;
# # ]
3016 [ # # ]: 0 : if (r != 0 && prefetch_count == 0) // stage 4
3017 : 0 : continue;
3018 : 0 : }
3019 : :
3020 [ + + + + ]: 2926 : if ((r == 0) && (fn != b_source1)) // stage 1
3021 : 1632 : continue;
3022 : :
3023 [ + - - + ]: 1294 : if (fdcache.probe (b_source0, fn) && // skip if already interned
3024 [ # # ]: 0 : fn != b_source1) // but only if we'd just be prefetching, PR29474
3025 : 0 : continue;
3026 : :
3027 : : // extract this file to a temporary file
3028 : 1294 : char* tmppath = NULL;
3029 : 1294 : rc = asprintf (&tmppath, "%s/debuginfod-fdcache.XXXXXX", tmpdir.c_str());
3030 [ - + ]: 1294 : if (rc < 0)
3031 [ # # # # ]: 0 : throw libc_exception (ENOMEM, "cannot allocate tmppath");
3032 : 1294 : defer_dtor<void*,void> tmmpath_freer (tmppath, free);
3033 [ + - ]: 1294 : fd = mkstemp (tmppath);
3034 [ - + ]: 1294 : if (fd < 0)
3035 [ # # # # ]: 0 : throw libc_exception (errno, "cannot create temporary file");
3036 : : // NB: don't unlink (tmppath), as fdcache will take charge of it.
3037 : :
3038 : : // NB: this can take many uninterruptible seconds for a huge file
3039 [ + - ]: 1294 : rc = archive_read_data_into_fd (a, fd);
3040 [ - + ]: 1294 : if (rc != ARCHIVE_OK) // e.g. ENOSPC!
3041 : : {
3042 [ # # ]: 0 : close (fd);
3043 : 0 : unlink (tmppath);
3044 [ # # # # ]: 0 : throw archive_exception(a, b_source0, "cannot extract file");
3045 : : }
3046 : :
3047 : : // Set the mtime so the fdcache file mtimes, even prefetched ones,
3048 : : // propagate to future webapi clients.
3049 : 1294 : struct timespec tvs[2];
3050 : 1294 : tvs[0].tv_sec = 0;
3051 : 1294 : tvs[0].tv_nsec = UTIME_OMIT;
3052 [ + - ]: 1294 : tvs[1].tv_sec = archive_entry_mtime(e);
3053 [ + - ]: 1294 : tvs[1].tv_nsec = archive_entry_mtime_nsec(e);
3054 : 1294 : (void) futimens (fd, tvs); /* best effort */
3055 : :
3056 [ + + ]: 1294 : if (r != 0) // stage 3
3057 : : {
3058 : 596 : struct timespec extract_end;
3059 : 596 : clock_gettime (CLOCK_MONOTONIC, &extract_end);
3060 : 596 : double extract_time = (extract_end.tv_sec - extract_begin.tv_sec)
3061 : 596 : + (extract_end.tv_nsec - extract_begin.tv_nsec)/1.e9;
3062 : : // NB: now we know we have a complete reusable file; make fdcache
3063 : : // responsible for unlinking it later.
3064 [ + - + - : 596 : fdcache.intern(b_source0, fn,
+ - ]
3065 : : tmppath, archive_entry_size(e),
3066 : : false, extract_time); // prefetched ones go to the prefetch cache
3067 : 596 : prefetch_count --;
3068 [ + - ]: 596 : close (fd); // we're not saving this fd to make a mhd-response from!
3069 : 596 : continue;
3070 : 596 : }
3071 : :
3072 [ + - + - : 698 : r = create_buildid_r_response (b_mtime, b_source0, b_source1, section,
+ - ]
3073 : : ima_sig, tmppath, fd,
3074 : : archive_entry_size(e),
3075 : : archive_entry_mtime(e),
3076 [ + - ]: 698 : archive_extension + " archive",
3077 : : extract_begin);
3078 [ + + ]: 698 : if (r == 0)
3079 : : break; // assume no chance of better luck around another iteration; no other copies of same file
3080 [ + - ]: 696 : if (result_fd)
3081 : 696 : *result_fd = fd;
3082 : 3522 : }
3083 : :
3084 : : // XXX: rpm/file not found: delete this R entry?
3085 : 698 : return r;
3086 : 1900 : }
3087 : :
3088 : : void
3089 : 660 : add_client_federation_headers(debuginfod_client *client, MHD_Connection* conn){
3090 : : // Transcribe incoming User-Agent:
3091 [ - + ]: 660 : string ua = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: "";
3092 [ + - + - ]: 664 : string ua_complete = string("User-Agent: ") + ua;
3093 [ + - ]: 660 : debuginfod_add_http_header (client, ua_complete.c_str());
3094 : :
3095 : : // Compute larger XFF:, for avoiding info loss during
3096 : : // federation, and for future cyclicity detection.
3097 [ + - + + : 1294 : string xff = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: "";
+ - ]
3098 [ + + ]: 660 : if (xff != "")
3099 [ + - ]: 60 : xff += string(", "); // comma separated list
3100 : :
3101 : 660 : unsigned int xff_count = 0;
3102 [ + + ]: 1032 : for (auto&& i : xff){
3103 [ + + ]: 372 : if (i == ',') xff_count++;
3104 : : }
3105 : :
3106 : : // if X-Forwarded-For: exceeds N hops,
3107 : : // do not delegate a local lookup miss to upstream debuginfods.
3108 [ + + ]: 660 : if (xff_count >= forwarded_ttl_limit)
3109 : 4 : throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found, --forwared-ttl-limit reached \
3110 [ + - ]: 8 : and will not query the upstream servers");
3111 : :
3112 : : // Compute the client's numeric IP address only - so can't merge with conninfo()
3113 [ + - ]: 656 : const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn,
3114 : : MHD_CONNECTION_INFO_CLIENT_ADDRESS);
3115 [ + - ]: 656 : struct sockaddr *so = u ? u->client_addr : 0;
3116 : 656 : char hostname[256] = ""; // RFC1035
3117 [ + - - + ]: 656 : if (so && so->sa_family == AF_INET) {
3118 [ # # ]: 0 : (void) getnameinfo (so, sizeof (struct sockaddr_in), hostname, sizeof (hostname), NULL, 0,
3119 : : NI_NUMERICHOST);
3120 [ + - ]: 656 : } else if (so && so->sa_family == AF_INET6) {
3121 : 656 : struct sockaddr_in6* addr6 = (struct sockaddr_in6*) so;
3122 [ + - + - : 656 : if (IN6_IS_ADDR_V4MAPPED(&addr6->sin6_addr)) {
- + ]
3123 : 656 : struct sockaddr_in addr4;
3124 [ + - ]: 656 : memset (&addr4, 0, sizeof(addr4));
3125 : 656 : addr4.sin_family = AF_INET;
3126 : 656 : addr4.sin_port = addr6->sin6_port;
3127 [ + - ]: 656 : memcpy (&addr4.sin_addr.s_addr, addr6->sin6_addr.s6_addr+12, sizeof(addr4.sin_addr.s_addr));
3128 [ + - ]: 656 : (void) getnameinfo ((struct sockaddr*) &addr4, sizeof (addr4),
3129 : : hostname, sizeof (hostname), NULL, 0,
3130 : : NI_NUMERICHOST);
3131 : : } else {
3132 [ # # ]: 0 : (void) getnameinfo (so, sizeof (struct sockaddr_in6), hostname, sizeof (hostname), NULL, 0,
3133 : : NI_NUMERICHOST);
3134 : : }
3135 : : }
3136 : :
3137 [ + - + - : 1316 : string xff_complete = string("X-Forwarded-For: ")+xff+string(hostname);
+ - + - ]
3138 [ + - ]: 656 : debuginfod_add_http_header (client, xff_complete.c_str());
3139 : 664 : }
3140 : :
3141 : : static struct MHD_Response*
3142 : 2212 : handle_buildid_match (bool internal_req_p,
3143 : : int64_t b_mtime,
3144 : : const string& b_stype,
3145 : : const string& b_source0,
3146 : : const string& b_source1,
3147 : : int64_t b_id0,
3148 : : int64_t b_id1,
3149 : : const string& section,
3150 : : int *result_fd)
3151 : : {
3152 : 2212 : try
3153 : : {
3154 [ + + ]: 2212 : if (b_stype == "F")
3155 [ + - ]: 1154 : return handle_buildid_f_match(internal_req_p, b_mtime, b_source0,
3156 : : section, result_fd);
3157 [ + - ]: 1058 : else if (b_stype == "R")
3158 [ + + ]: 1058 : return handle_buildid_r_match(internal_req_p, b_mtime, b_source0,
3159 : : b_source1, b_id0, b_id1, section,
3160 : : result_fd);
3161 : : }
3162 [ - + ]: 58 : catch (const reportable_exception &e)
3163 : : {
3164 [ + - ]: 58 : e.report(clog);
3165 : : // Report but swallow libc etc. errors here; let the caller
3166 : : // iterate to other matches of the content.
3167 : 58 : }
3168 : :
3169 : : return 0;
3170 : : }
3171 : :
3172 : :
3173 : : static int
3174 : 4 : debuginfod_find_progress (debuginfod_client *, long a, long b)
3175 : : {
3176 [ - + ]: 4 : if (verbose > 4)
3177 [ # # # # : 0 : obatched(clog) << "federated debuginfod progress=" << a << "/" << b << endl;
# # # # ]
3178 : :
3179 : 4 : return interrupted;
3180 : : }
3181 : :
3182 : :
3183 : : // a little lru pool of debuginfod_client*s for reuse between query threads
3184 : :
3185 : : mutex dc_pool_lock;
3186 : : deque<debuginfod_client*> dc_pool;
3187 : :
3188 : 680 : debuginfod_client* debuginfod_pool_begin()
3189 : : {
3190 : 680 : unique_lock<mutex> lock(dc_pool_lock);
3191 [ + + ]: 680 : if (dc_pool.size() > 0)
3192 : : {
3193 [ + - + - : 1296 : inc_metric("dc_pool_op_count","op","begin-reuse");
+ - + - ]
3194 : 648 : debuginfod_client *c = dc_pool.front();
3195 : 648 : dc_pool.pop_front();
3196 : 648 : return c;
3197 : : }
3198 [ + - + - : 64 : inc_metric("dc_pool_op_count","op","begin-new");
+ - + - ]
3199 [ + - ]: 32 : return debuginfod_begin();
3200 : 680 : }
3201 : :
3202 : :
3203 : 162 : void debuginfod_pool_groom()
3204 : : {
3205 : 162 : unique_lock<mutex> lock(dc_pool_lock);
3206 [ + + ]: 194 : while (dc_pool.size() > 0)
3207 : : {
3208 [ + - + - : 64 : inc_metric("dc_pool_op_count","op","end");
+ - + - ]
3209 [ + - ]: 32 : debuginfod_end(dc_pool.front());
3210 : 32 : dc_pool.pop_front();
3211 : : }
3212 : 162 : }
3213 : :
3214 : :
3215 : 680 : void debuginfod_pool_end(debuginfod_client* c)
3216 : : {
3217 : 680 : unique_lock<mutex> lock(dc_pool_lock);
3218 [ + - + - : 1360 : inc_metric("dc_pool_op_count","op","end-save");
+ - + - ]
3219 [ + - ]: 680 : dc_pool.push_front(c); // accelerate reuse, vs. push_back
3220 : 680 : }
3221 : :
3222 : :
3223 : : static struct MHD_Response*
3224 : 2796 : handle_buildid (MHD_Connection* conn,
3225 : : const string& buildid /* unsafe */,
3226 : : string& artifacttype /* unsafe, cleanse on exception/return */,
3227 : : const string& suffix /* unsafe */,
3228 : : int *result_fd)
3229 : : {
3230 : : // validate artifacttype
3231 [ + + ]: 2796 : string atype_code;
3232 [ + + + - ]: 2796 : if (artifacttype == "debuginfo") atype_code = "D";
3233 [ + + + - ]: 1828 : else if (artifacttype == "executable") atype_code = "E";
3234 [ + + + - ]: 1138 : else if (artifacttype == "source") atype_code = "S";
3235 [ + + + - ]: 12 : else if (artifacttype == "section") atype_code = "I";
3236 : : else {
3237 [ + - ]: 4 : artifacttype = "invalid"; // PR28242 ensure http_resposes metrics don't propagate unclean user data
3238 [ + - ]: 8 : throw reportable_exception("invalid artifacttype");
3239 : : }
3240 : :
3241 [ + + ]: 2792 : if (conn != 0)
3242 [ + - + - : 5826 : inc_metric("http_requests_total", "type", artifacttype);
+ - ]
3243 : :
3244 [ + + ]: 2792 : string section;
3245 [ + + ]: 2792 : if (atype_code == "I")
3246 : : {
3247 [ - + ]: 8 : if (suffix.size () < 2)
3248 [ # # ]: 0 : throw reportable_exception ("invalid section suffix");
3249 : :
3250 : : // Remove leading '/'
3251 [ + - ]: 8 : section = suffix.substr(1);
3252 : : }
3253 : :
3254 [ + + - + ]: 2792 : if (atype_code == "S" && suffix == "")
3255 [ # # ]: 0 : throw reportable_exception("invalid source suffix");
3256 : :
3257 : : // validate buildid
3258 [ + + ]: 2792 : if ((buildid.size() < 2) || // not empty
3259 [ + + + - : 5582 : (buildid.size() % 2) || // even number
+ - ]
3260 : 2790 : (buildid.find_first_not_of("0123456789abcdef") != string::npos)) // pure tasty lowercase hex
3261 [ + - ]: 4 : throw reportable_exception("invalid buildid");
3262 : :
3263 [ + - ]: 2790 : if (verbose > 1)
3264 [ + - + - ]: 8370 : obatched(clog) << "searching for buildid=" << buildid << " artifacttype=" << artifacttype
3265 [ + - + - : 2790 : << " suffix=" << suffix << endl;
+ - + - +
- ]
3266 : :
3267 : : // If invoked from the scanner threads, use the scanners' read-write
3268 : : // connection. Otherwise use the web query threads' read-only connection.
3269 [ + + ]: 2790 : sqlite3 *thisdb = (conn == 0) ? db : dbq;
3270 : :
3271 : 2790 : sqlite_ps *pp = 0;
3272 : :
3273 [ + + ]: 2790 : if (atype_code == "D")
3274 : : {
3275 : 968 : pp = new sqlite_ps (thisdb, "mhd-query-d",
3276 : : "select mtime, sourcetype, source0, source1, id0, id1 from " BUILDIDS "_query_d2 where buildid = ? "
3277 [ + - + - : 1936 : "order by mtime desc");
+ - + - ]
3278 [ + - ]: 968 : pp->reset();
3279 [ + - ]: 968 : pp->bind(1, buildid);
3280 : : }
3281 [ + + ]: 1822 : else if (atype_code == "E")
3282 : : {
3283 : 688 : pp = new sqlite_ps (thisdb, "mhd-query-e",
3284 : : "select mtime, sourcetype, source0, source1, id0, id1 from " BUILDIDS "_query_e2 where buildid = ? "
3285 [ + - + - : 1376 : "order by mtime desc");
+ - + - ]
3286 [ + - ]: 688 : pp->reset();
3287 [ + - ]: 688 : pp->bind(1, buildid);
3288 : : }
3289 [ + + ]: 1134 : else if (atype_code == "S")
3290 : : {
3291 : : // PR25548
3292 : : // Incoming source queries may come in with either dwarf-level OR canonicalized paths.
3293 : : // We let the query pass with either one.
3294 : :
3295 : 1126 : pp = new sqlite_ps (thisdb, "mhd-query-s",
3296 : : "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_s where buildid = ? and artifactsrc in (?,?) "
3297 [ + - + - : 2252 : "order by sharedprefix(source0,source0ref) desc, mtime desc");
+ - + - ]
3298 [ + - ]: 1126 : pp->reset();
3299 [ + - ]: 1126 : pp->bind(1, buildid);
3300 : : // NB: we don't store the non-canonicalized path names any more, but old databases
3301 : : // might have them (and no canon ones), so we keep searching for both.
3302 [ + - ]: 1126 : pp->bind(2, suffix);
3303 [ + - + - ]: 2890 : pp->bind(3, canon_pathname(suffix));
3304 : : }
3305 [ + - ]: 8 : else if (atype_code == "I")
3306 : : {
3307 : 8 : pp = new sqlite_ps (thisdb, "mhd-query-i",
3308 : : "select mtime, sourcetype, source0, source1, 1 as debug_p from " BUILDIDS "_query_d2 where buildid = ? "
3309 : : "union all "
3310 : : "select mtime, sourcetype, source0, source1, 0 as debug_p from " BUILDIDS "_query_e2 where buildid = ? "
3311 [ + - + - : 16 : "order by debug_p desc, mtime desc");
+ - + - ]
3312 [ + - ]: 8 : pp->reset();
3313 [ + - ]: 8 : pp->bind(1, buildid);
3314 [ + - ]: 8 : pp->bind(2, buildid);
3315 : : }
3316 : 2790 : unique_ptr<sqlite_ps> ps_closer(pp); // release pp if exception or return
3317 : :
3318 : 2790 : bool do_upstream_section_query = true;
3319 : :
3320 : : // consume all the rows
3321 : 2914 : while (1)
3322 : : {
3323 [ + - ]: 2852 : int rc = pp->step();
3324 [ + + ]: 2852 : if (rc == SQLITE_DONE) break;
3325 [ - + ]: 2212 : if (rc != SQLITE_ROW)
3326 [ # # # # ]: 0 : throw sqlite_exception(rc, "step");
3327 : :
3328 [ + - ]: 2212 : int64_t b_mtime = sqlite3_column_int64 (*pp, 0);
3329 [ + - - + : 2212 : string b_stype = string((const char*) sqlite3_column_text (*pp, 1) ?: ""); /* by DDL may not be NULL */
+ - ]
3330 [ + - - + : 2212 : string b_source0 = string((const char*) sqlite3_column_text (*pp, 2) ?: ""); /* may be NULL */
+ - ]
3331 [ + - + + : 3366 : string b_source1 = string((const char*) sqlite3_column_text (*pp, 3) ?: ""); /* may be NULL */
+ - ]
3332 : 2212 : int64_t b_id0 = 0, b_id1 = 0;
3333 [ + + + + ]: 2212 : if (atype_code == "D" || atype_code == "E")
3334 : : {
3335 [ + - ]: 1072 : b_id0 = sqlite3_column_int64 (*pp, 4);
3336 [ + - ]: 1072 : b_id1 = sqlite3_column_int64 (*pp, 5);
3337 : : }
3338 : :
3339 [ + - ]: 2212 : if (verbose > 1)
3340 [ + - + - ]: 6636 : obatched(clog) << "found mtime=" << b_mtime << " stype=" << b_stype
3341 [ + - + - : 2212 : << " source0=" << b_source0 << " source1=" << b_source1 << endl;
+ - + - +
- + - +
- ]
3342 : :
3343 : : // Try accessing the located match.
3344 : : // XXX: in case of multiple matches, attempt them in parallel?
3345 [ + - ]: 2212 : auto r = handle_buildid_match (conn ? false : true,
3346 : : b_mtime, b_stype, b_source0, b_source1,
3347 : : b_id0, b_id1, section, result_fd);
3348 [ + + ]: 2212 : if (r)
3349 : 2150 : return r;
3350 : :
3351 : : // If a debuginfo file matching BUILDID was found but didn't contain
3352 : : // the desired section, then the section should not exist. Don't
3353 : : // bother querying upstream servers.
3354 [ + + + - : 62 : if (!section.empty () && (sqlite3_column_int (*pp, 4) == 1))
- + ]
3355 : : {
3356 : 4 : struct stat st;
3357 : :
3358 : : // For "F" sourcetype, check if the debuginfo exists. For "R"
3359 : : // sourcetype, check if the debuginfo was interned into the fdcache.
3360 [ - + ]: 2 : if ((b_stype == "F" && (stat (b_source0.c_str (), &st) == 0))
3361 [ + + + - : 4 : || (b_stype == "R" && fdcache.probe (b_source0, b_source1)))
+ - + - ]
3362 : : do_upstream_section_query = false;
3363 : : }
3364 : 2212 : }
3365 [ + - ]: 640 : pp->reset();
3366 : :
3367 [ - + ]: 640 : if (!do_upstream_section_query)
3368 [ # # ]: 0 : throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found");
3369 : :
3370 : : // We couldn't find it in the database. Last ditch effort
3371 : : // is to defer to other debuginfo servers.
3372 : :
3373 : 640 : int fd = -1;
3374 [ + - ]: 640 : debuginfod_client *client = debuginfod_pool_begin ();
3375 [ - + ]: 640 : if (client == NULL)
3376 [ # # # # ]: 0 : throw libc_exception(errno, "debuginfod client pool alloc");
3377 : 640 : defer_dtor<debuginfod_client*,void> client_closer (client, debuginfod_pool_end);
3378 : :
3379 [ + - ]: 640 : debuginfod_set_progressfn (client, & debuginfod_find_progress);
3380 : :
3381 [ + + ]: 640 : if (conn)
3382 [ + + ]: 620 : add_client_federation_headers(client, conn);
3383 : :
3384 [ + + ]: 636 : if (artifacttype == "debuginfo")
3385 [ + - ]: 88 : fd = debuginfod_find_debuginfo (client,
3386 [ + - ]: 88 : (const unsigned char*) buildid.c_str(),
3387 : : 0, NULL);
3388 [ + + ]: 548 : else if (artifacttype == "executable")
3389 [ + - ]: 546 : fd = debuginfod_find_executable (client,
3390 [ + - ]: 546 : (const unsigned char*) buildid.c_str(),
3391 : : 0, NULL);
3392 [ + - ]: 2 : else if (artifacttype == "source")
3393 [ + - ]: 2 : fd = debuginfod_find_source (client,
3394 [ + - ]: 2 : (const unsigned char*) buildid.c_str(),
3395 : : 0, suffix.c_str(), NULL);
3396 [ # # ]: 0 : else if (artifacttype == "section")
3397 [ # # ]: 0 : fd = debuginfod_find_section (client,
3398 [ # # ]: 0 : (const unsigned char*) buildid.c_str(),
3399 : : 0, section.c_str(), NULL);
3400 : :
3401 [ + + ]: 636 : if (fd >= 0)
3402 : : {
3403 [ + - ]: 4 : if (conn != 0)
3404 [ + - + - : 644 : inc_metric ("http_responses_total","result","upstream");
+ - + - ]
3405 : 4 : struct stat s;
3406 : 4 : int rc = fstat (fd, &s);
3407 [ + - ]: 4 : if (rc == 0)
3408 : : {
3409 [ + - ]: 4 : auto r = MHD_create_response_from_fd ((uint64_t) s.st_size, fd);
3410 [ + - ]: 4 : if (r)
3411 : : {
3412 [ + - ]: 4 : add_mhd_response_header (r, "Content-Type",
3413 : : "application/octet-stream");
3414 : : // Copy the incoming headers
3415 [ + - ]: 4 : const char * hdrs = debuginfod_get_headers(client);
3416 [ + - ]: 4 : string header_dup;
3417 [ + - ]: 4 : if (hdrs)
3418 [ + - ]: 4 : header_dup = string(hdrs);
3419 : : // Parse the "header: value\n" lines into (h,v) tuples and pass on
3420 : 20 : while(1)
3421 : : {
3422 : 12 : size_t newline = header_dup.find('\n');
3423 [ + + ]: 12 : if (newline == string::npos) break;
3424 : 8 : size_t colon = header_dup.find(':');
3425 [ + - ]: 8 : if (colon == string::npos) break;
3426 [ + - ]: 8 : string header = header_dup.substr(0,colon);
3427 [ + - ]: 8 : string value = header_dup.substr(colon+1,newline-colon-1);
3428 : : // strip leading spaces from value
3429 : 8 : size_t nonspace = value.find_first_not_of(" ");
3430 [ + - ]: 8 : if (nonspace != string::npos)
3431 [ + - ]: 8 : value = value.substr(nonspace);
3432 [ + - ]: 8 : add_mhd_response_header(r, header.c_str(), value.c_str());
3433 [ + - ]: 8 : header_dup = header_dup.substr(newline+1);
3434 : 8 : }
3435 : :
3436 [ + - ]: 4 : add_mhd_last_modified (r, s.st_mtime);
3437 [ + - ]: 4 : if (verbose > 1)
3438 [ + - + - ]: 8 : obatched(clog) << "serving file from upstream debuginfod/cache" << endl;
3439 [ + - ]: 4 : if (result_fd)
3440 : 4 : *result_fd = fd;
3441 : 4 : return r; // NB: don't close fd; libmicrohttpd will
3442 : 0 : }
3443 : : }
3444 [ # # ]: 0 : close (fd);
3445 : : }
3446 : : else
3447 [ + + ]: 632 : switch(fd)
3448 : : {
3449 : : case -ENOSYS:
3450 : : break;
3451 : : case -ENOENT:
3452 : : break;
3453 : 532 : default: // some more tricky error
3454 [ + - + - ]: 1064 : throw libc_exception(-fd, "upstream debuginfod query failed");
3455 : : }
3456 : :
3457 [ + - ]: 200 : throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found");
3458 : 3428 : }
3459 : :
3460 : :
3461 : : ////////////////////////////////////////////////////////////////////////
3462 : :
3463 : : static map<string,double> metrics; // arbitrary data for /metrics query
3464 : : // NB: store int64_t since all our metrics are integers; prometheus accepts double
3465 : : static mutex metrics_lock;
3466 : : // NB: these objects get released during the process exit via global dtors
3467 : : // do not call them from within other global dtors
3468 : :
3469 : : // utility function for assembling prometheus-compatible
3470 : : // name="escaped-value" strings
3471 : : // https://prometheus.io/docs/instrumenting/exposition_formats/
3472 : : static string
3473 : 857477 : metric_label(const string& name, const string& value)
3474 : : {
3475 : 857477 : string x = name + "=\"";
3476 [ + + ]: 14750537 : for (auto&& c : value)
3477 [ - - - + ]: 13894340 : switch(c)
3478 : : {
3479 [ # # ]: 0 : case '\\': x += "\\\\"; break;
3480 [ # # ]: 0 : case '\"': x += "\\\""; break;
3481 [ # # ]: 0 : case '\n': x += "\\n"; break;
3482 [ + - ]: 27787835 : default: x += c; break;
3483 : : }
3484 [ + - ]: 856197 : x += "\"";
3485 : 856627 : return x;
3486 : 0 : }
3487 : :
3488 : :
3489 : : // add prometheus-format metric name + label tuple (if any) + value
3490 : :
3491 : : static void
3492 : 12192 : set_metric(const string& metric, double value)
3493 : : {
3494 : 12192 : unique_lock<mutex> lock(metrics_lock);
3495 [ + - ]: 12192 : metrics[metric] = value;
3496 : 12192 : }
3497 : : static void
3498 : 102 : inc_metric(const string& metric)
3499 : : {
3500 : 102 : unique_lock<mutex> lock(metrics_lock);
3501 [ + - ]: 102 : metrics[metric] ++;
3502 : 102 : }
3503 : : static void
3504 : 8307 : set_metric(const string& metric,
3505 : : const string& lname, const string& lvalue,
3506 : : double value)
3507 : : {
3508 [ + - + - ]: 16614 : string key = (metric + "{" + metric_label(lname, lvalue) + "}");
3509 [ + - ]: 8308 : unique_lock<mutex> lock(metrics_lock);
3510 [ + - ]: 8310 : metrics[key] = value;
3511 : 8310 : }
3512 : :
3513 : : static void
3514 : 415197 : inc_metric(const string& metric,
3515 : : const string& lname, const string& lvalue)
3516 : : {
3517 [ + - + - ]: 830395 : string key = (metric + "{" + metric_label(lname, lvalue) + "}");
3518 [ + - ]: 415203 : unique_lock<mutex> lock(metrics_lock);
3519 [ + - ]: 415257 : metrics[key] ++;
3520 : 415196 : }
3521 : : static void
3522 : 394876 : add_metric(const string& metric,
3523 : : const string& lname, const string& lvalue,
3524 : : double value)
3525 : : {
3526 [ + - + - ]: 790005 : string key = (metric + "{" + metric_label(lname, lvalue) + "}");
3527 [ + - ]: 395188 : unique_lock<mutex> lock(metrics_lock);
3528 [ + - ]: 395468 : metrics[key] += value;
3529 : 395458 : }
3530 : : static void
3531 : 102 : add_metric(const string& metric,
3532 : : double value)
3533 : : {
3534 : 102 : unique_lock<mutex> lock(metrics_lock);
3535 [ + - ]: 102 : metrics[metric] += value;
3536 : 102 : }
3537 : :
3538 : :
3539 : : // and more for higher arity labels if needed
3540 : :
3541 : : static void
3542 : 10179 : inc_metric(const string& metric,
3543 : : const string& lname, const string& lvalue,
3544 : : const string& rname, const string& rvalue)
3545 : : {
3546 [ + - ]: 20358 : string key = (metric + "{"
3547 [ + - + - ]: 40716 : + metric_label(lname, lvalue) + ","
3548 [ + - ]: 30537 : + metric_label(rname, rvalue) + "}");
3549 [ + - ]: 10179 : unique_lock<mutex> lock(metrics_lock);
3550 [ + - ]: 10179 : metrics[key] ++;
3551 : 10179 : }
3552 : : static void
3553 : 10179 : add_metric(const string& metric,
3554 : : const string& lname, const string& lvalue,
3555 : : const string& rname, const string& rvalue,
3556 : : double value)
3557 : : {
3558 [ + - ]: 20358 : string key = (metric + "{"
3559 [ + - + - ]: 40716 : + metric_label(lname, lvalue) + ","
3560 [ + - ]: 30537 : + metric_label(rname, rvalue) + "}");
3561 [ + - ]: 10179 : unique_lock<mutex> lock(metrics_lock);
3562 [ + - ]: 10179 : metrics[key] += value;
3563 : 10179 : }
3564 : :
3565 : : static struct MHD_Response*
3566 : 747 : handle_metrics (off_t* size)
3567 : : {
3568 : 747 : stringstream o;
3569 : 747 : {
3570 [ + - ]: 747 : unique_lock<mutex> lock(metrics_lock);
3571 [ + + ]: 79642 : for (auto&& i : metrics)
3572 [ + - ]: 78895 : o << i.first
3573 : : << " "
3574 [ + - + - ]: 78895 : << std::setprecision(std::numeric_limits<double>::digits10 + 1)
3575 [ + - + - ]: 78895 : << i.second
3576 : 78895 : << endl;
3577 : 0 : }
3578 [ + - ]: 747 : const string& os = o.str();
3579 [ + - ]: 747 : MHD_Response* r = MHD_create_response_from_buffer (os.size(),
3580 [ + - ]: 747 : (void*) os.c_str(),
3581 : : MHD_RESPMEM_MUST_COPY);
3582 [ + - ]: 747 : if (r != NULL)
3583 : : {
3584 [ + - ]: 747 : *size = os.size();
3585 [ + - ]: 747 : add_mhd_response_header (r, "Content-Type", "text/plain");
3586 : : }
3587 : 1494 : return r;
3588 : 747 : }
3589 : :
3590 : : static sqlite_ps*
3591 : 30 : handle_metadata_glob(sqlite3* thisdb, const string& key, const string& value)
3592 : : {
3593 : : // Query locally for matching e, d files
3594 [ + + ]: 30 : string op;
3595 [ + + ]: 30 : if (key == "glob")
3596 [ + - ]: 26 : op = "glob";
3597 [ + - ]: 4 : else if (key == "file")
3598 [ + - ]: 4 : op = "=";
3599 : : else
3600 [ # # ]: 0 : throw reportable_exception("/metadata webapi error, unsupported key");
3601 : :
3602 : : // Since PR30378, the file names are segmented into two tables. We
3603 : : // could do a glob/= search over the _files_v view that combines
3604 : : // them, but that means that the entire _files_v thing has to be
3605 : : // materialized & scanned to do the query. Slow! Instead, we can
3606 : : // segment the incoming file/glob pattern into dirname / basename
3607 : : // parts, and apply them to the corresponding table. This is done
3608 : : // by splitting the value at the last "/". If absent, the same
3609 : : // convention as is used in register_file_name().
3610 : :
3611 : 30 : string dirname, bname; // basename is a "poisoned" identifier on some distros
3612 : 30 : size_t slash = value.rfind('/');
3613 [ - + ]: 30 : if (slash == std::string::npos) {
3614 [ # # ]: 0 : dirname = "";
3615 [ # # ]: 0 : bname = value;
3616 : : } else {
3617 [ + - ]: 30 : dirname = value.substr(0, slash);
3618 [ + - ]: 30 : bname = value.substr(slash+1);
3619 : : }
3620 : :
3621 : : // NB: further optimization is possible: replacing the 'glob' op
3622 : : // with simple equality, if the corresponding value segment lacks
3623 : : // metacharacters. sqlite may or may not be smart enough to do so,
3624 : : // so we help out.
3625 [ + - ]: 30 : string metacharacters = "[]*?";
3626 [ + + + + : 56 : string dop = (op == "glob" && dirname.find_first_of(metacharacters) == string::npos) ? "=" : op;
+ - + - ]
3627 [ + + - + : 56 : string bop = (op == "glob" && bname.find_first_of(metacharacters) == string::npos) ? "=" : op;
- - + - ]
3628 : :
3629 : 30 : string sql = string(
3630 : : // explicit query r_de and f_de once here, rather than the query_d and query_e
3631 : : // separately, because they scan the same tables, so we'd double the work
3632 : : "select d1.executable_p, d1.debuginfo_p, 0 as source_p, "
3633 : : " b1.hex, f1d.name || '/' || f1b.name as file, a1.name as archive "
3634 : : "from " BUILDIDS "_r_de d1, " BUILDIDS "_files f1, " BUILDIDS "_fileparts f1b, " BUILDIDS "_fileparts f1d, "
3635 : : BUILDIDS "_buildids b1, " BUILDIDS "_files_v a1 "
3636 : : "where f1.id = d1.content and a1.id = d1.file and d1.buildid = b1.id "
3637 [ + - + - ]: 90 : " and f1d.name " + dop + " ? and f1b.name " + bop + " ? and f1.dirname = f1d.id and f1.basename = f1b.id "
3638 : : "union all \n"
3639 : : "select d2.executable_p, d2.debuginfo_p, 0, "
3640 : : " b2.hex, f2d.name || '/' || f2b.name, NULL "
3641 : : "from " BUILDIDS "_f_de d2, " BUILDIDS "_files f2, " BUILDIDS "_fileparts f2b, " BUILDIDS "_fileparts f2d, "
3642 : : BUILDIDS "_buildids b2 "
3643 : : "where f2.id = d2.file and d2.buildid = b2.id "
3644 [ + - + - ]: 90 : " and f2d.name " + dop + " ? and f2b.name " + bop + " ? "
3645 : 30 : " and f2.dirname = f2d.id and f2.basename = f2b.id");
3646 : :
3647 : : // NB: we could query source file names too, thusly:
3648 : : //
3649 : : // select * from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f1, " BUILDIDS "_r_sref sr
3650 : : // where b.id = sr.buildid and f1.id = sr.artifactsrc and f1.name " + op + "?"
3651 : : // UNION ALL something with BUILDIDS "_f_s"
3652 : : //
3653 : : // But the first part of this query cannot run fast without the same index temp-created
3654 : : // during "maxigroom":
3655 : : // create index " BUILDIDS "_r_sref_arc on " BUILDIDS "_r_sref(artifactsrc);
3656 : : // and unfortunately this index is HUGE. It's similar to the size of the _r_sref
3657 : : // table, which is already the largest part of a debuginfod index. Adding that index
3658 : : // would nearly double the .sqlite db size.
3659 : :
3660 [ + - + - : 30 : sqlite_ps *pp = new sqlite_ps (thisdb, "mhd-query-meta-glob", sql);
+ - ]
3661 [ + - ]: 30 : pp->reset();
3662 [ + - ]: 30 : pp->bind(1, dirname);
3663 [ + - ]: 30 : pp->bind(2, bname);
3664 [ + - ]: 30 : pp->bind(3, dirname);
3665 [ + - ]: 30 : pp->bind(4, bname);
3666 : 30 : return pp;
3667 : 30 : }
3668 : :
3669 : : static sqlite_ps*
3670 : 10 : handle_metadata_buildid(sqlite3* thisdb, const string& value)
3671 : : {
3672 : 10 : string sql = string(
3673 : : "select d1.executable_p, d1.debuginfo_p, 0 as source_p, "
3674 : : " b1.hex, f1d.name || '/' || f1b.name as file, a1.name as archive "
3675 : : "from " BUILDIDS "_r_de d1, " BUILDIDS "_files f1, " BUILDIDS "_fileparts f1b, " BUILDIDS "_fileparts f1d, "
3676 : : BUILDIDS "_buildids b1, " BUILDIDS "_files_v a1 "
3677 : : "where f1.id = d1.content and a1.id = d1.file and d1.buildid = b1.id "
3678 : : " and b1.hex = ? and f1.dirname = f1d.id and f1.basename = f1b.id "
3679 : : "union all \n"
3680 : : "select d2.executable_p, d2.debuginfo_p, 0, "
3681 : : " b2.hex, f2d.name || '/' || f2b.name, NULL "
3682 : : "from " BUILDIDS "_f_de d2, " BUILDIDS "_files f2, " BUILDIDS "_fileparts f2b, " BUILDIDS "_fileparts f2d, "
3683 : : BUILDIDS "_buildids b2 "
3684 : : "where f2.id = d2.file and d2.buildid = b2.id "
3685 : : " and b2.hex = ? "
3686 : 10 : " and f2.dirname = f2d.id and f2.basename = f2b.id");
3687 : :
3688 [ + - + - : 10 : sqlite_ps *pp = new sqlite_ps (thisdb, "mhd-query-meta-buildid", sql);
+ - ]
3689 [ + - ]: 10 : pp->reset();
3690 [ + - ]: 10 : pp->bind(1, value); // Bind buildid for the first select (_r_de)
3691 [ + - ]: 10 : pp->bind(2, value); // Bind buildid for the second select (_f_de)
3692 : 10 : return pp;
3693 : 10 : }
3694 : :
3695 : : static struct MHD_Response*
3696 : 40 : handle_metadata (MHD_Connection* conn,
3697 : : string key, string value, off_t* size)
3698 : : {
3699 : 40 : MHD_Response* r;
3700 : : // Because this query can take on the order of many seconds, we need
3701 : : // to prevent DoS against the other normal quick queries, so we use
3702 : : // a dedicated database connection.
3703 : 40 : sqlite3 *thisdb = 0;
3704 : 40 : int rc = sqlite3_open_v2 (db_path.c_str(), &thisdb, (SQLITE_OPEN_READONLY
3705 : : |SQLITE_OPEN_URI
3706 : : |SQLITE_OPEN_PRIVATECACHE
3707 : : |SQLITE_OPEN_NOMUTEX), /* private to us */
3708 : : NULL);
3709 [ - + ]: 40 : if (rc)
3710 [ # # # # ]: 0 : throw sqlite_exception(rc, "cannot open database for metadata query");
3711 : 40 : defer_dtor<sqlite3*,int> sqlite_db_closer (thisdb, sqlite3_close_v2);
3712 : :
3713 : 40 : sqlite_ps *pp = nullptr;
3714 : :
3715 [ + + + + ]: 40 : if (key == "glob" || key == "file") {
3716 [ + - ]: 30 : pp = handle_metadata_glob(thisdb, key, value);
3717 [ + - ]: 10 : } else if (key == "buildid") {
3718 [ + - ]: 10 : pp = handle_metadata_buildid(thisdb, value);
3719 : : } else {
3720 [ # # ]: 0 : throw reportable_exception("/metadata webapi error, unsupported key");
3721 : : }
3722 : :
3723 : 40 : unique_ptr<sqlite_ps> ps_closer(pp); // release pp if exception or return
3724 : 40 : pp->reset_timeout(metadata_maxtime_s);
3725 : :
3726 [ + - ]: 40 : json_object *metadata = json_object_new_object();
3727 [ - + - - : 40 : if (!metadata) throw libc_exception(ENOMEM, "json allocation");
- - ]
3728 : 40 : defer_dtor<json_object*,int> metadata_d(metadata, json_object_put);
3729 [ + - ]: 40 : json_object *metadata_arr = json_object_new_array();
3730 [ - + - - : 40 : if (!metadata_arr) throw libc_exception(ENOMEM, "json allocation");
- - ]
3731 [ + - ]: 40 : json_object_object_add(metadata, "results", metadata_arr);
3732 : : // consume all the rows
3733 : :
3734 : 70 : bool metadata_complete = true;
3735 : 70 : while (1)
3736 : : {
3737 [ + - ]: 70 : rc = pp->step_timeout();
3738 [ + + ]: 70 : if (rc == SQLITE_DONE) // success
3739 : : break;
3740 [ + - ]: 30 : if (rc == SQLITE_ABORT || rc == SQLITE_INTERRUPT) // interrupted such as by timeout
3741 : : {
3742 : : metadata_complete = false;
3743 : : break;
3744 : : }
3745 [ - + ]: 30 : if (rc != SQLITE_ROW) // error
3746 [ # # # # ]: 0 : throw sqlite_exception(rc, "step");
3747 : :
3748 [ + - ]: 30 : int m_executable_p = sqlite3_column_int (*pp, 0);
3749 [ + - ]: 30 : int m_debuginfo_p = sqlite3_column_int (*pp, 1);
3750 [ + - ]: 30 : int m_source_p = sqlite3_column_int (*pp, 2);
3751 [ + - - + : 30 : string m_buildid = (const char*) sqlite3_column_text (*pp, 3) ?: ""; // should always be non-null
+ - ]
3752 [ + - - + : 30 : string m_file = (const char*) sqlite3_column_text (*pp, 4) ?: "";
+ - ]
3753 [ + - - + : 30 : string m_archive = (const char*) sqlite3_column_text (*pp, 5) ?: "";
+ - ]
3754 : :
3755 : : // Confirm that m_file matches in the fnmatch(FNM_PATHNAME)
3756 : : // sense, since sqlite's GLOB operator is a looser filter.
3757 [ + + + - : 30 : if (key == "glob" && fnmatch(value.c_str(), m_file.c_str(), FNM_PATHNAME) != 0)
+ - ]
3758 : 0 : continue;
3759 : :
3760 : 60 : auto add_metadata = [metadata_arr, m_buildid, m_file, m_archive](const string& type) {
3761 : 30 : json_object* entry = json_object_new_object();
3762 [ - + - - : 30 : if (NULL == entry) throw libc_exception (ENOMEM, "cannot allocate json");
- - ]
3763 : 30 : defer_dtor<json_object*,int> entry_d(entry, json_object_put);
3764 : :
3765 : 150 : auto add_entry_metadata = [entry](const char* k, string v) {
3766 : 120 : json_object* s;
3767 [ + - ]: 120 : if(v != "") {
3768 : 120 : s = json_object_new_string(v.c_str());
3769 [ - + - - : 120 : if (NULL == s) throw libc_exception (ENOMEM, "cannot allocate json");
- - ]
3770 : 120 : json_object_object_add(entry, k, s);
3771 : : }
3772 : 120 : };
3773 : :
3774 [ + - + - ]: 30 : add_entry_metadata("type", type.c_str());
3775 [ + - + - ]: 30 : add_entry_metadata("buildid", m_buildid);
3776 [ + - + - ]: 30 : add_entry_metadata("file", m_file);
3777 [ + - + - : 60 : if (m_archive != "") add_entry_metadata("archive", m_archive);
+ - ]
3778 [ - + ]: 30 : if (verbose > 3)
3779 [ # # ]: 0 : obatched(clog) << "metadata found local "
3780 : : << json_object_to_json_string_ext(entry,
3781 [ # # # # : 0 : JSON_C_TO_STRING_PRETTY)
# # ]
3782 : 0 : << endl;
3783 : :
3784 : : // Increase ref count to switch its ownership
3785 [ + - + - ]: 30 : json_object_array_add(metadata_arr, json_object_get(entry));
3786 [ + - + - : 60 : };
+ - ]
3787 : :
3788 [ + + + - : 54 : if (m_executable_p) add_metadata("executable");
+ - ]
3789 [ + + + - : 36 : if (m_debuginfo_p) add_metadata("debuginfo");
+ - ]
3790 [ - + - - : 30 : if (m_source_p) add_metadata("source");
- - ]
3791 : 30 : }
3792 [ + - ]: 40 : pp->reset();
3793 : :
3794 [ + - ]: 40 : unsigned num_local_results = json_object_array_length(metadata_arr);
3795 : :
3796 : : // Query upstream as well
3797 [ + - ]: 40 : debuginfod_client *client = debuginfod_pool_begin();
3798 [ + - ]: 40 : if (client != NULL)
3799 : : {
3800 [ + - ]: 40 : add_client_federation_headers(client, conn);
3801 : :
3802 : 40 : int upstream_metadata_fd;
3803 : 40 : char *upstream_metadata_file = NULL;
3804 [ + - ]: 40 : upstream_metadata_fd = debuginfod_find_metadata(client, key.c_str(), (char*)value.c_str(),
3805 : : &upstream_metadata_file);
3806 [ + + ]: 40 : if (upstream_metadata_fd >= 0) {
3807 : : /* json-c >= 0.13 has json_object_from_fd(). */
3808 [ + - ]: 22 : json_object *upstream_metadata_json = json_object_from_file(upstream_metadata_file);
3809 : 22 : free (upstream_metadata_file);
3810 : 22 : json_object *upstream_metadata_json_arr;
3811 : 22 : json_object *upstream_complete;
3812 [ - + ]: 22 : if (NULL != upstream_metadata_json &&
3813 [ + - + - : 44 : json_object_object_get_ex(upstream_metadata_json, "results", &upstream_metadata_json_arr) &&
- + ]
3814 [ + - ]: 22 : json_object_object_get_ex(upstream_metadata_json, "complete", &upstream_complete))
3815 : : {
3816 [ + - ]: 22 : metadata_complete &= json_object_get_boolean(upstream_complete);
3817 [ + - + + ]: 30 : for (int i = 0, n = json_object_array_length(upstream_metadata_json_arr); i < n; i++)
3818 : : {
3819 [ + - ]: 8 : json_object *entry = json_object_array_get_idx(upstream_metadata_json_arr, i);
3820 [ - + ]: 8 : if (verbose > 3)
3821 [ # # ]: 0 : obatched(clog) << "metadata found remote "
3822 : : << json_object_to_json_string_ext(entry,
3823 [ # # # # : 0 : JSON_C_TO_STRING_PRETTY)
# # ]
3824 : 0 : << endl;
3825 : :
3826 [ + - ]: 8 : json_object_get(entry); // increment reference count
3827 [ + - ]: 8 : json_object_array_add(metadata_arr, entry);
3828 : : }
3829 [ + - ]: 22 : json_object_put(upstream_metadata_json);
3830 : : }
3831 [ + - ]: 22 : close(upstream_metadata_fd);
3832 : : }
3833 [ + - ]: 40 : debuginfod_pool_end (client);
3834 : : }
3835 : :
3836 [ + - ]: 40 : unsigned num_total_results = json_object_array_length(metadata_arr);
3837 : :
3838 [ + - ]: 40 : if (verbose > 2)
3839 [ + - + - ]: 120 : obatched(clog) << "metadata found local=" << num_local_results
3840 [ + - + - ]: 40 : << " remote=" << (num_total_results-num_local_results)
3841 [ + - + - : 40 : << " total=" << num_total_results
+ - ]
3842 : 40 : << endl;
3843 : :
3844 [ + - + - ]: 40 : json_object_object_add(metadata, "complete", json_object_new_boolean(metadata_complete));
3845 [ + - ]: 40 : const char* metadata_str = json_object_to_json_string(metadata);
3846 [ - + ]: 40 : if (!metadata_str)
3847 [ # # # # ]: 0 : throw libc_exception (ENOMEM, "cannot allocate json");
3848 [ + - ]: 40 : r = MHD_create_response_from_buffer (strlen(metadata_str),
3849 : : (void*) metadata_str,
3850 : : MHD_RESPMEM_MUST_COPY);
3851 : 40 : *size = strlen(metadata_str);
3852 [ + - ]: 40 : if (r)
3853 [ + - ]: 40 : add_mhd_response_header(r, "Content-Type", "application/json");
3854 : 40 : return r;
3855 : 40 : }
3856 : :
3857 : :
3858 : : static struct MHD_Response*
3859 : 4 : handle_root (off_t* size)
3860 : : {
3861 : 4 : MHD_Response* r;
3862 [ + - ]: 4 : if (cust_homepage_file != "")
3863 : 4 : try
3864 : : {
3865 [ + - ]: 4 : int fd = open (cust_homepage_file.c_str(), O_RDONLY);
3866 [ + + ]: 4 : if (fd != -1) {
3867 : 2 : struct stat buf;
3868 : 2 : stat (cust_homepage_file.c_str(), &buf);
3869 [ + - ]: 2 : r = MHD_create_response_from_fd(buf.st_size, fd);
3870 : : // NB: MHD owns and handles the fd from now. Must not close()!
3871 [ + - ]: 2 : if (r != NULL)
3872 : : {
3873 : 2 : *size = buf.st_size;
3874 [ + - ]: 2 : add_mhd_response_header (r, "Content-Type", "text/html");
3875 : : }
3876 : : } else {
3877 [ + - + - ]: 4 : throw libc_exception (errno, "cannot open file " + cust_homepage_file);
3878 : : }
3879 : 2 : return r;
3880 : : }
3881 [ - + ]: 2 : catch (const reportable_exception& e)
3882 : : {
3883 [ + - ]: 2 : e.report(clog);
3884 : 2 : }
3885 : :
3886 [ + - + - ]: 4 : static string version = "debuginfod (" + string (PACKAGE_NAME) + ") "
3887 [ + - + - : 66 : + string (PACKAGE_VERSION);
+ - + - ]
3888 : 2 : r = MHD_create_response_from_buffer (version.size (),
3889 : 2 : (void *) version.c_str (),
3890 : : MHD_RESPMEM_PERSISTENT);
3891 [ + - ]: 2 : if (r != NULL)
3892 : : {
3893 : 2 : *size = version.size ();
3894 : 2 : add_mhd_response_header (r, "Content-Type", "text/plain");
3895 : : }
3896 : : return r;
3897 : : }
3898 : :
3899 : :
3900 : : static struct MHD_Response*
3901 : 2 : handle_options (off_t* size)
3902 : : {
3903 : 2 : static char empty_body[] = " ";
3904 : 2 : MHD_Response* r = MHD_create_response_from_buffer (1, empty_body,
3905 : : MHD_RESPMEM_PERSISTENT);
3906 [ + - ]: 2 : if (r != NULL)
3907 : : {
3908 : 2 : *size = 1;
3909 : 2 : add_mhd_response_header (r, "Access-Control-Allow-Origin", "*");
3910 : 2 : add_mhd_response_header (r, "Access-Control-Allow-Methods", "GET, OPTIONS");
3911 : 2 : add_mhd_response_header (r, "Access-Control-Allow-Headers", "cache-control");
3912 : : }
3913 : 2 : return r;
3914 : : }
3915 : :
3916 : :
3917 : : ////////////////////////////////////////////////////////////////////////
3918 : :
3919 : :
3920 : : /* libmicrohttpd callback */
3921 : : static MHD_RESULT
3922 : 6790 : handler_cb (void * /*cls*/,
3923 : : struct MHD_Connection *connection,
3924 : : const char *url,
3925 : : const char *method,
3926 : : const char * /*version*/,
3927 : : const char * /*upload_data*/,
3928 : : size_t * /*upload_data_size*/,
3929 : : void ** ptr)
3930 : : {
3931 : 6790 : struct MHD_Response *r = NULL;
3932 : 6790 : string url_copy = url;
3933 : :
3934 : : /* libmicrohttpd always makes (at least) two callbacks: once just
3935 : : past the headers, and one after the request body is finished
3936 : : being received. If we process things early (first callback) and
3937 : : queue a response, libmicrohttpd would suppress http keep-alive
3938 : : (via connection->read_closed = true). */
3939 : 6790 : static int aptr; /* just some random object to use as a flag */
3940 [ + + ]: 6790 : if (&aptr != *ptr)
3941 : : {
3942 : : /* do never respond on first call */
3943 : 3395 : *ptr = &aptr;
3944 : 3395 : return MHD_YES;
3945 : : }
3946 : 3395 : *ptr = NULL; /* reset when done */
3947 : :
3948 [ + - ]: 3395 : const char *maxsize_string = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, "X-DEBUGINFOD-MAXSIZE");
3949 : 3394 : long maxsize = 0;
3950 [ + + + - ]: 3394 : if (maxsize_string != NULL && maxsize_string[0] != '\0')
3951 : 2 : maxsize = atol(maxsize_string);
3952 : : else
3953 : : maxsize = 0;
3954 : :
3955 : : #if MHD_VERSION >= 0x00097002
3956 : 3394 : enum MHD_Result rc;
3957 : : #else
3958 : : int rc = MHD_NO; // mhd
3959 : : #endif
3960 : 3394 : int http_code = 500;
3961 : 3394 : off_t http_size = -1;
3962 : 3394 : struct timespec ts_start, ts_end;
3963 : 3394 : clock_gettime (CLOCK_MONOTONIC, &ts_start);
3964 : 3395 : double afteryou = 0.0;
3965 [ + + ]: 3395 : string artifacttype, suffix;
3966 : 3395 : string urlargs; // for logging
3967 : :
3968 : 3395 : try
3969 : : {
3970 [ + + + - : 3395 : if (webapi_cors && method == string("OPTIONS"))
+ + ]
3971 : : {
3972 [ + - + - : 4 : inc_metric("http_requests_total", "type", method);
+ - + - ]
3973 [ + - ]: 2 : r = handle_options(& http_size);
3974 [ + - ]: 2 : rc = MHD_queue_response (connection, MHD_HTTP_OK, r);
3975 : 2 : http_code = MHD_HTTP_OK;
3976 [ + - ]: 2 : MHD_destroy_response (r);
3977 : 2 : return rc;
3978 : : }
3979 [ + - - + ]: 4023 : else if (string(method) != "GET")
3980 [ # # ]: 0 : throw reportable_exception(400, "we support OPTIONS+GET only");
3981 : :
3982 : : /* Start decoding the URL. */
3983 : 3393 : size_t slash1 = url_copy.find('/', 1);
3984 [ + - ]: 3393 : string url1 = url_copy.substr(0, slash1); // ok even if slash1 not found
3985 : :
3986 [ + + - + ]: 3393 : if (slash1 != string::npos && url1 == "/buildid")
3987 : : {
3988 : : // PR27863: block this thread awhile if another thread is already busy
3989 : : // fetching the exact same thing. This is better for Everyone.
3990 : : // The latecomer says "... after you!" and waits.
3991 [ + - + - : 5814 : add_metric ("thread_busy", "role", "http-buildid-after-you", 1);
+ - + - ]
3992 : : #ifdef HAVE_PTHREAD_SETNAME_NP
3993 : 2596 : (void) pthread_setname_np (pthread_self(), "mhd-buildid-after-you");
3994 : : #endif
3995 : 2596 : struct timespec tsay_start, tsay_end;
3996 : 2596 : clock_gettime (CLOCK_MONOTONIC, &tsay_start);
3997 [ + + + - ]: 2656 : static unique_set<string> busy_urls;
3998 [ + - ]: 2596 : unique_set_reserver<string> after_you(busy_urls, url_copy);
3999 : 2596 : clock_gettime (CLOCK_MONOTONIC, &tsay_end);
4000 : 2596 : afteryou = (tsay_end.tv_sec - tsay_start.tv_sec) + (tsay_end.tv_nsec - tsay_start.tv_nsec)/1.e9;
4001 [ + - + - : 5192 : add_metric ("thread_busy", "role", "http-buildid-after-you", -1);
+ - + - ]
4002 : :
4003 [ + - + - : 5192 : tmp_inc_metric m ("thread_busy", "role", "http-buildid");
+ - + - ]
4004 : : #ifdef HAVE_PTHREAD_SETNAME_NP
4005 : 2596 : (void) pthread_setname_np (pthread_self(), "mhd-buildid");
4006 : : #endif
4007 : 2596 : size_t slash2 = url_copy.find('/', slash1+1);
4008 [ - + ]: 2596 : if (slash2 == string::npos)
4009 [ # # ]: 0 : throw reportable_exception("/buildid/ webapi error, need buildid");
4010 : :
4011 [ + - ]: 2596 : string buildid = url_copy.substr(slash1+1, slash2-slash1-1);
4012 : :
4013 : 2596 : size_t slash3 = url_copy.find('/', slash2+1);
4014 : :
4015 [ + + ]: 2596 : if (slash3 == string::npos)
4016 : : {
4017 [ + - ]: 1462 : artifacttype = url_copy.substr(slash2+1);
4018 [ + - ]: 1462 : suffix = "";
4019 : : }
4020 : : else
4021 : : {
4022 [ + - ]: 1134 : artifacttype = url_copy.substr(slash2+1, slash3-slash2-1);
4023 [ + - ]: 1756 : suffix = url_copy.substr(slash3); // include the slash in the suffix
4024 : : }
4025 : :
4026 : : // get the resulting fd so we can report its size
4027 : 2596 : int fd;
4028 [ + + ]: 2596 : r = handle_buildid (connection, buildid, artifacttype, suffix, &fd);
4029 : 1974 : if (r)
4030 : : {
4031 : 1974 : struct stat fs;
4032 [ + - ]: 1974 : if (fstat(fd, &fs) == 0)
4033 : 1974 : http_size = fs.st_size;
4034 : : // libmicrohttpd will close (fd);
4035 : : }
4036 : 3218 : }
4037 [ + + ]: 797 : else if (url1 == "/metrics")
4038 : : {
4039 [ + - + - : 1494 : tmp_inc_metric m ("thread_busy", "role", "http-metrics");
+ - + - ]
4040 [ + - ]: 747 : artifacttype = "metrics";
4041 [ + - + - : 1494 : inc_metric("http_requests_total", "type", artifacttype);
+ - ]
4042 [ + - ]: 747 : r = handle_metrics(& http_size);
4043 : 747 : }
4044 [ + + ]: 50 : else if (url1 == "/metadata")
4045 : : {
4046 [ + - + - : 80 : tmp_inc_metric m ("thread_busy", "role", "http-metadata");
+ - + - ]
4047 [ + - ]: 40 : const char* key = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "key");
4048 [ + - ]: 40 : const char* value = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "value");
4049 [ - + ]: 40 : if (NULL == value || NULL == key)
4050 [ # # ]: 0 : throw reportable_exception("/metadata webapi error, need key and value");
4051 : :
4052 [ + - + - : 40 : urlargs = string("?key=") + string(key) + string("&value=") + string(value); // apprx., for logging
+ - + - +
- + - +
- ]
4053 [ + - ]: 40 : artifacttype = "metadata";
4054 [ + - + - : 80 : inc_metric("http_requests_total", "type", artifacttype);
+ - ]
4055 [ + - + - : 40 : r = handle_metadata(connection, key, value, &http_size);
+ - ]
4056 : 40 : }
4057 [ + + ]: 10 : else if (url1 == "/")
4058 : : {
4059 [ + - ]: 4 : artifacttype = "/";
4060 [ + - + - : 638 : inc_metric("http_requests_total", "type", artifacttype);
+ - ]
4061 [ + - ]: 4 : r = handle_root(& http_size);
4062 : : }
4063 : : else
4064 [ + - + - ]: 12 : throw reportable_exception("webapi error, unrecognized '" + url1 + "'");
4065 : :
4066 [ - + ]: 2765 : if (r == 0)
4067 [ # # ]: 0 : throw reportable_exception("internal error, missing response");
4068 : :
4069 [ + + + - ]: 2765 : if (maxsize > 0 && http_size > maxsize)
4070 : : {
4071 [ + - ]: 2 : MHD_destroy_response(r);
4072 [ + - + - ]: 4 : throw reportable_exception(406, "File too large, max size=" + std::to_string(maxsize));
4073 : : }
4074 : :
4075 [ + + ]: 2763 : if (webapi_cors)
4076 : : // add ACAO header for all successful requests
4077 [ + - ]: 136 : add_mhd_response_header (r, "Access-Control-Allow-Origin", "*");
4078 [ + + + + ]: 2763 : if ((cust_homepage_redirect) != "" && (url1 == "/"))
4079 : : {
4080 : : // redirect to given custom --homepage
4081 [ + - ]: 4 : MHD_add_response_header(r, "Location", cust_homepage_redirect.c_str());
4082 [ + - ]: 4 : rc = MHD_queue_response (connection, MHD_HTTP_FOUND, r);
4083 : : http_code = MHD_HTTP_FOUND;
4084 : : }
4085 : : else
4086 : : {
4087 [ + - ]: 2759 : rc = MHD_queue_response (connection, MHD_HTTP_OK, r);
4088 : : http_code = MHD_HTTP_OK;
4089 : : }
4090 [ + - ]: 2763 : MHD_destroy_response (r);
4091 : 3393 : }
4092 [ - + ]: 630 : catch (const reportable_exception& e)
4093 : : {
4094 [ + - + - : 1260 : inc_metric("http_responses_total","result","error");
+ - + - ]
4095 [ + - ]: 630 : e.report(clog);
4096 : 630 : http_code = e.code;
4097 [ + - ]: 630 : http_size = e.message.size();
4098 [ + - ]: 630 : rc = e.mhd_send_response (connection);
4099 : 630 : }
4100 : :
4101 : 3393 : clock_gettime (CLOCK_MONOTONIC, &ts_end);
4102 : 3393 : double deltas = (ts_end.tv_sec - ts_start.tv_sec) + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9;
4103 : : // afteryou: delay waiting for other client's identical query to complete
4104 : : // deltas: total latency, including afteryou waiting
4105 [ + - + - ]: 6786 : obatched(clog) << conninfo(connection)
4106 : : << ' ' << method << ' ' << url << urlargs
4107 [ + - + - : 3393 : << ' ' << http_code << ' ' << http_size
+ - + - +
- + - + -
+ - ]
4108 [ + - + - : 3393 : << ' ' << (int)(afteryou*1000) << '+' << (int)((deltas-afteryou)*1000) << "ms"
+ - + - +
- + - +
- ]
4109 : 3393 : << endl;
4110 : :
4111 : : // related prometheus metrics
4112 : 3393 : string http_code_str = to_string(http_code);
4113 [ + - + - : 6786 : add_metric("http_responses_transfer_bytes_sum",
+ - + - ]
4114 : : "code", http_code_str, "type", artifacttype, http_size);
4115 [ + - + - : 6786 : inc_metric("http_responses_transfer_bytes_count",
+ - + - ]
4116 : : "code", http_code_str, "type", artifacttype);
4117 : :
4118 [ + - + - : 6786 : add_metric("http_responses_duration_milliseconds_sum",
+ - + - ]
4119 : : "code", http_code_str, "type", artifacttype, deltas*1000); // prometheus prefers _seconds and floating point
4120 [ + - + - : 6786 : inc_metric("http_responses_duration_milliseconds_count",
+ - + - ]
4121 : : "code", http_code_str, "type", artifacttype);
4122 : :
4123 [ + - + - : 6786 : add_metric("http_responses_after_you_milliseconds_sum",
+ - + - ]
4124 : : "code", http_code_str, "type", artifacttype, afteryou*1000);
4125 [ + - + - : 6786 : inc_metric("http_responses_after_you_milliseconds_count",
+ - + - ]
4126 : : "code", http_code_str, "type", artifacttype);
4127 : :
4128 : 3393 : return rc;
4129 : 13578 : }
4130 : :
4131 : :
4132 : : ////////////////////////////////////////////////////////////////////////
4133 : : // borrowed originally from src/nm.c get_local_names()
4134 : :
4135 : : static void
4136 : 411 : dwarf_extract_source_paths (Elf *elf, set<string>& debug_sourcefiles)
4137 : : noexcept // no exceptions - so we can simplify the altdbg resource release at end
4138 : : {
4139 : 411 : Dwarf* dbg = dwarf_begin_elf (elf, DWARF_C_READ, NULL);
4140 [ - + ]: 412 : if (dbg == NULL)
4141 : 0 : return;
4142 : :
4143 : 412 : Dwarf* altdbg = NULL;
4144 : 412 : int altdbg_fd = -1;
4145 : :
4146 : : // DWZ handling: if we have an unsatisfied debug-alt-link, add an
4147 : : // empty string into the outgoing sourcefiles set, so the caller
4148 : : // should know that our data is incomplete.
4149 : 412 : const char *alt_name_p;
4150 : 412 : const void *alt_build_id; // elfutils-owned memory
4151 : 412 : ssize_t sz = dwelf_dwarf_gnu_debugaltlink (dbg, &alt_name_p, &alt_build_id);
4152 [ + + ]: 412 : if (sz > 0) // got one!
4153 : : {
4154 : 200 : string buildid;
4155 : 200 : unsigned char* build_id_bytes = (unsigned char*) alt_build_id;
4156 [ + + ]: 4200 : for (ssize_t idx=0; idx<sz; idx++)
4157 : : {
4158 : 4000 : buildid += "0123456789abcdef"[build_id_bytes[idx] >> 4];
4159 : 4000 : buildid += "0123456789abcdef"[build_id_bytes[idx] & 0xf];
4160 : : }
4161 : :
4162 [ + + ]: 200 : if (verbose > 3)
4163 : 156 : obatched(clog) << "Need altdebug buildid=" << buildid << endl;
4164 : :
4165 : : // but is it unsatisfied the normal elfutils ways?
4166 : 200 : Dwarf* alt = dwarf_getalt (dbg);
4167 [ + - ]: 200 : if (alt == NULL)
4168 : : {
4169 : : // Yup, unsatisfied the normal way. Maybe we can satisfy it
4170 : : // from our own debuginfod database.
4171 : 200 : int alt_fd;
4172 : 200 : struct MHD_Response *r = 0;
4173 : 200 : try
4174 : : {
4175 [ + - ]: 200 : string artifacttype = "debuginfo";
4176 [ + - + + ]: 220 : r = handle_buildid (0, buildid, artifacttype, "", &alt_fd);
4177 : : // NB: no need for ACAO etc. headers; this is not getting sent to a client
4178 : 20 : }
4179 [ - + ]: 20 : catch (const reportable_exception& e)
4180 : : {
4181 : : // swallow exceptions
4182 : 20 : }
4183 : :
4184 : : // NB: this is not actually recursive! This invokes the web-query
4185 : : // path, which cannot get back into the scan code paths.
4186 : 200 : if (r)
4187 : : {
4188 : : // Found it!
4189 : 180 : altdbg_fd = dup(alt_fd); // ok if this fails, downstream failures ok
4190 : 180 : alt = altdbg = dwarf_begin (altdbg_fd, DWARF_C_READ);
4191 : : // NB: must close this dwarf and this fd at the bottom of the function!
4192 : 180 : MHD_destroy_response (r); // will close alt_fd
4193 [ - + ]: 180 : if (alt)
4194 : 180 : dwarf_setalt (dbg, alt);
4195 : : }
4196 : : }
4197 : : else
4198 : : {
4199 : : // NB: dwarf_setalt(alt) inappropriate - already done!
4200 : : // NB: altdbg will stay 0 so nothing tries to redundantly dealloc.
4201 : : }
4202 : :
4203 [ + + ]: 200 : if (alt)
4204 : : {
4205 [ + + ]: 180 : if (verbose > 3)
4206 : 156 : obatched(clog) << "Resolved altdebug buildid=" << buildid << endl;
4207 : : }
4208 : : else // (alt == NULL) - signal possible presence of poor debuginfo
4209 : : {
4210 : 20 : debug_sourcefiles.insert("");
4211 [ + - ]: 20 : if (verbose > 3)
4212 : 0 : obatched(clog) << "Unresolved altdebug buildid=" << buildid << endl;
4213 : : }
4214 : 200 : }
4215 : :
4216 : 412 : Dwarf_Off offset = 0;
4217 : 412 : Dwarf_Off old_offset;
4218 : 412 : size_t hsize;
4219 : :
4220 [ + + ]: 9937 : while (dwarf_nextcu (dbg, old_offset = offset, &offset, &hsize, NULL, NULL, NULL) == 0)
4221 : : {
4222 : 9526 : Dwarf_Die cudie_mem;
4223 : 9526 : Dwarf_Die *cudie = dwarf_offdie (dbg, old_offset + hsize, &cudie_mem);
4224 : :
4225 [ - + ]: 9528 : if (cudie == NULL)
4226 : 36 : continue;
4227 [ + + ]: 9528 : if (dwarf_tag (cudie) != DW_TAG_compile_unit)
4228 : 36 : continue;
4229 : :
4230 [ - + ]: 9492 : const char *cuname = dwarf_diename(cudie) ?: "unknown";
4231 : :
4232 : 9492 : Dwarf_Files *files;
4233 : 9492 : size_t nfiles;
4234 [ - + ]: 9492 : if (dwarf_getsrcfiles (cudie, &files, &nfiles) != 0)
4235 : 0 : continue;
4236 : :
4237 : : // extract DW_AT_comp_dir to resolve relative file names
4238 : 9492 : const char *comp_dir = "";
4239 : 9492 : const char *const *dirs;
4240 : 9492 : size_t ndirs;
4241 [ - + ]: 9492 : if (dwarf_getsrcdirs (files, &dirs, &ndirs) == 0 &&
4242 [ - + ]: 9492 : dirs[0] != NULL)
4243 : : comp_dir = dirs[0];
4244 : : if (comp_dir == NULL)
4245 : : comp_dir = "";
4246 : :
4247 [ + + ]: 9492 : if (verbose > 3)
4248 : 14819 : obatched(clog) << "searching for sources for cu=" << cuname << " comp_dir=" << comp_dir
4249 : 7409 : << " #files=" << nfiles << " #dirs=" << ndirs << endl;
4250 : :
4251 [ - + - - ]: 9491 : if (comp_dir[0] == '\0' && cuname[0] != '/')
4252 : : {
4253 [ # # ]: 0 : if (verbose > 3)
4254 : 0 : obatched(clog) << "skipping cu=" << cuname << " due to empty comp_dir" << endl;
4255 : 0 : continue;
4256 : : }
4257 : :
4258 [ + + ]: 155662 : for (size_t f = 1; f < nfiles; f++)
4259 : : {
4260 : 146173 : const char *hat = dwarf_filesrc (files, f, NULL, NULL);
4261 [ - + ]: 146211 : if (hat == NULL)
4262 : 1768 : continue;
4263 : :
4264 [ + + ]: 146368 : if (string(hat) == "<built-in>"
4265 [ + + + + ]: 292485 : || string_endswith(hat, "<built-in>")) // gcc intrinsics, don't bother record
4266 : 1768 : continue;
4267 : :
4268 [ + + ]: 144600 : string waldo;
4269 [ + + ]: 144600 : if (hat[0] == '/') // absolute
4270 : 90531 : waldo = (string (hat));
4271 [ + - ]: 54069 : else if (comp_dir[0] != '\0') // comp_dir relative
4272 : 54069 : waldo = (string (comp_dir) + string("/") + string (hat));
4273 : : else
4274 : : {
4275 [ # # ]: 0 : if (verbose > 3)
4276 : 0 : obatched(clog) << "skipping hat=" << hat << " due to empty comp_dir" << endl;
4277 : 0 : continue;
4278 : : }
4279 : :
4280 : : // NB: this is the 'waldo' that a dbginfo client will have
4281 : : // to supply for us to give them the file The comp_dir
4282 : : // prefixing is a definite complication. Otherwise we'd
4283 : : // have to return a setof comp_dirs (one per CU!) with
4284 : : // corresponding filesrc[] names, instead of one absolute
4285 : : // resoved set. Maybe we'll have to do that anyway. XXX
4286 : :
4287 [ + + ]: 144654 : if (verbose > 4)
4288 [ - + ]: 32 : obatched(clog) << waldo
4289 [ - + ]: 16 : << (debug_sourcefiles.find(waldo)==debug_sourcefiles.end() ? " new" : " dup") << endl;
4290 : :
4291 : 144654 : debug_sourcefiles.insert (waldo);
4292 : 144403 : }
4293 : : }
4294 : :
4295 : 412 : dwarf_end(dbg);
4296 [ + + ]: 412 : if (altdbg)
4297 : 180 : dwarf_end(altdbg);
4298 [ + + ]: 412 : if (altdbg_fd >= 0)
4299 : 180 : close(altdbg_fd);
4300 : : }
4301 : :
4302 : :
4303 : :
4304 : : static void
4305 : 1708 : elf_classify (int fd, bool &executable_p, bool &debuginfo_p, string &buildid, set<string>& debug_sourcefiles)
4306 : : {
4307 : 1708 : Elf *elf = elf_begin (fd, ELF_C_READ_MMAP_PRIVATE, NULL);
4308 [ + - ]: 1708 : if (elf == NULL)
4309 : : return;
4310 : :
4311 : 1708 : try // catch our types of errors and clean up the Elf* object
4312 : : {
4313 [ + - + + ]: 1708 : if (elf_kind (elf) != ELF_K_ELF)
4314 : : {
4315 [ + - ]: 906 : elf_end (elf);
4316 : 946 : return;
4317 : : }
4318 : :
4319 : 802 : GElf_Ehdr ehdr_storage;
4320 [ + - ]: 802 : GElf_Ehdr *ehdr = gelf_getehdr (elf, &ehdr_storage);
4321 [ - + ]: 802 : if (ehdr == NULL)
4322 : : {
4323 [ # # ]: 0 : elf_end (elf);
4324 : : return;
4325 : : }
4326 : 802 : auto elf_type = ehdr->e_type;
4327 : :
4328 : 802 : const void *build_id; // elfutils-owned memory
4329 [ + - ]: 802 : ssize_t sz = dwelf_elf_gnu_build_id (elf, & build_id);
4330 [ + + ]: 800 : if (sz <= 0)
4331 : : {
4332 : : // It's not a diagnostic-worthy error for an elf file to lack build-id.
4333 : : // It might just be very old.
4334 [ + - ]: 40 : elf_end (elf);
4335 : : return;
4336 : : }
4337 : :
4338 : : // build_id is a raw byte array; convert to hexadecimal *lowercase*
4339 : 760 : unsigned char* build_id_bytes = (unsigned char*) build_id;
4340 [ + + ]: 15939 : for (ssize_t idx=0; idx<sz; idx++)
4341 : : {
4342 [ + - ]: 15179 : buildid += "0123456789abcdef"[build_id_bytes[idx] >> 4];
4343 [ + - ]: 30379 : buildid += "0123456789abcdef"[build_id_bytes[idx] & 0xf];
4344 : : }
4345 : :
4346 : : // now decide whether it's an executable - namely, any allocatable section has
4347 : : // PROGBITS;
4348 [ + + ]: 760 : if (elf_type == ET_EXEC || elf_type == ET_DYN)
4349 : : {
4350 : 699 : size_t shnum;
4351 [ + - ]: 699 : int rc = elf_getshdrnum (elf, &shnum);
4352 [ - + ]: 699 : if (rc < 0)
4353 [ # # # # ]: 0 : throw elfutils_exception(rc, "getshdrnum");
4354 : :
4355 : 699 : executable_p = false;
4356 [ + + ]: 13326 : for (size_t sc = 0; sc < shnum; sc++)
4357 : : {
4358 [ + - ]: 13002 : Elf_Scn *scn = elf_getscn (elf, sc);
4359 [ - + ]: 12996 : if (scn == NULL)
4360 : 0 : continue;
4361 : :
4362 : 12996 : GElf_Shdr shdr_mem;
4363 [ + - ]: 12996 : GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
4364 [ - + ]: 13001 : if (shdr == NULL)
4365 : 0 : continue;
4366 : :
4367 : : // allocated (loadable / vm-addr-assigned) section with available content?
4368 [ + + + + ]: 13001 : if ((shdr->sh_type == SHT_PROGBITS) && (shdr->sh_flags & SHF_ALLOC))
4369 : : {
4370 [ - + ]: 374 : if (verbose > 4)
4371 [ # # # # : 0 : obatched(clog) << "executable due to SHF_ALLOC SHT_PROGBITS sc=" << sc << endl;
# # ]
4372 : 374 : executable_p = true;
4373 : 374 : break; // no need to keep looking for others
4374 : : }
4375 : : } // iterate over sections
4376 : : } // executable_p classification
4377 : :
4378 : : // now decide whether it's a debuginfo - namely, if it has any .debug* or .zdebug* sections
4379 : : // logic mostly stolen from fweimer@redhat.com's elfclassify drafts
4380 : 759 : size_t shstrndx;
4381 [ + - ]: 759 : int rc = elf_getshdrstrndx (elf, &shstrndx);
4382 [ - + ]: 760 : if (rc < 0)
4383 [ # # # # ]: 0 : throw elfutils_exception(rc, "getshdrstrndx");
4384 : :
4385 : : Elf_Scn *scn = NULL;
4386 : : bool symtab_p = false;
4387 : : bool bits_alloc_p = false;
4388 : 40868 : while (true)
4389 : : {
4390 [ + - ]: 20814 : scn = elf_nextscn (elf, scn);
4391 [ + + ]: 20687 : if (scn == NULL)
4392 : : break;
4393 : 20337 : GElf_Shdr shdr_storage;
4394 [ + - ]: 20337 : GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_storage);
4395 [ + - ]: 20337 : if (shdr == NULL)
4396 : : break;
4397 [ + - ]: 20337 : const char *section_name = elf_strptr (elf, shstrndx, shdr->sh_name);
4398 [ + - ]: 20465 : if (section_name == NULL)
4399 : : break;
4400 [ + + ]: 20465 : if (startswith (section_name, ".debug_line") ||
4401 [ - + ]: 20054 : startswith (section_name, ".zdebug_line"))
4402 : : {
4403 : 411 : debuginfo_p = true;
4404 [ + - ]: 411 : if (scan_source_info)
4405 : 411 : dwarf_extract_source_paths (elf, debug_sourcefiles);
4406 : : break; // expecting only one .*debug_line, so no need to look for others
4407 : : }
4408 [ + + ]: 20054 : else if (startswith (section_name, ".debug_") ||
4409 [ + - ]: 18691 : startswith (section_name, ".zdebug_"))
4410 : : {
4411 : 1248 : debuginfo_p = true;
4412 : : // NB: don't break; need to parse .debug_line for sources
4413 : : }
4414 [ + + ]: 18806 : else if (shdr->sh_type == SHT_SYMTAB)
4415 : : {
4416 : : symtab_p = true;
4417 : : }
4418 : 18782 : else if (shdr->sh_type != SHT_NOBITS
4419 [ + + ]: 18782 : && shdr->sh_type != SHT_NOTE
4420 [ + + ]: 9190 : && (shdr->sh_flags & SHF_ALLOC) != 0)
4421 : : {
4422 : 20054 : bits_alloc_p = true;
4423 : : }
4424 : 20054 : }
4425 : :
4426 : : // For more expansive elf/split-debuginfo classification, we
4427 : : // want to identify as debuginfo "strip -s"-produced files
4428 : : // without .debug_info* (like libicudata), but we don't want to
4429 : : // identify "strip -g" executables (with .symtab left there).
4430 [ - + ]: 762 : if (symtab_p && !bits_alloc_p)
4431 : 0 : debuginfo_p = true;
4432 : : }
4433 [ # # ]: 0 : catch (const reportable_exception& e)
4434 : : {
4435 [ # # ]: 0 : e.report(clog);
4436 : 0 : }
4437 : 762 : elf_end (elf);
4438 : : }
4439 : :
4440 : :
4441 : : // Intern the given file name in two parts (dirname & basename) and
4442 : : // return the resulting file's id.
4443 : : static int64_t
4444 : 35641 : register_file_name(sqlite_ps& ps_upsert_fileparts,
4445 : : sqlite_ps& ps_upsert_file,
4446 : : sqlite_ps& ps_lookup_file,
4447 : : const string& name)
4448 : : {
4449 : 35641 : std::size_t slash = name.rfind('/');
4450 [ + + ]: 35645 : string dirname, filename;
4451 [ + + ]: 35645 : if (slash == std::string::npos)
4452 : : {
4453 [ + - ]: 90 : dirname = "";
4454 [ + - ]: 90 : filename = name;
4455 : : }
4456 : : else
4457 : : {
4458 [ + - ]: 35555 : dirname = name.substr(0, slash);
4459 [ + - ]: 35560 : filename = name.substr(slash+1);
4460 : : }
4461 : : // NB: see also handle_metadata()
4462 : :
4463 : : // intern the two substrings
4464 : 35648 : ps_upsert_fileparts
4465 [ + - ]: 35648 : .reset()
4466 [ + - ]: 35646 : .bind(1, dirname)
4467 [ + - ]: 35610 : .step_ok_done();
4468 : 35650 : ps_upsert_fileparts
4469 [ + - ]: 35650 : .reset()
4470 [ + - ]: 35650 : .bind(1, filename)
4471 [ + - ]: 35647 : .step_ok_done();
4472 : :
4473 : : // intern the tuple
4474 : 35650 : ps_upsert_file
4475 [ + - ]: 35650 : .reset()
4476 [ + - ]: 35650 : .bind(1, dirname)
4477 [ + - ]: 35643 : .bind(2, filename)
4478 [ + - ]: 35646 : .step_ok_done();
4479 : :
4480 : : // look up the tuple's id
4481 : 35650 : ps_lookup_file
4482 [ + - ]: 35650 : .reset()
4483 [ + - ]: 35650 : .bind(1, dirname)
4484 [ + - ]: 35646 : .bind(2, filename);
4485 [ + - ]: 35647 : int rc = ps_lookup_file.step();
4486 [ - + - - : 35650 : if (rc != SQLITE_ROW) throw sqlite_exception(rc, "step");
- - ]
4487 : :
4488 [ + - ]: 35650 : int64_t id = sqlite3_column_int64 (ps_lookup_file, 0);
4489 [ + - ]: 35647 : ps_lookup_file.reset();
4490 : 35650 : return id;
4491 : 35650 : }
4492 : :
4493 : :
4494 : :
4495 : : static void
4496 : 1106 : scan_source_file (const string& rps, const stat_t& st,
4497 : : sqlite_ps& ps_upsert_buildids,
4498 : : sqlite_ps& ps_upsert_fileparts,
4499 : : sqlite_ps& ps_upsert_file,
4500 : : sqlite_ps& ps_lookup_file,
4501 : : sqlite_ps& ps_upsert_de,
4502 : : sqlite_ps& ps_upsert_s,
4503 : : sqlite_ps& ps_query,
4504 : : sqlite_ps& ps_scan_done,
4505 : : unsigned& fts_cached,
4506 : : unsigned& fts_executable,
4507 : : unsigned& fts_debuginfo,
4508 : : unsigned& fts_sourcefiles)
4509 : : {
4510 : 1106 : int64_t fileid = register_file_name(ps_upsert_fileparts, ps_upsert_file, ps_lookup_file, rps);
4511 : :
4512 : : /* See if we know of it already. */
4513 : 1106 : int rc = ps_query
4514 : 1106 : .reset()
4515 : 1106 : .bind(1, fileid)
4516 : 1106 : .bind(2, st.st_mtime)
4517 : 1106 : .step();
4518 : 1106 : ps_query.reset();
4519 [ + + ]: 1106 : if (rc == SQLITE_ROW) // i.e., a result, as opposed to DONE (no results)
4520 : : // no need to recheck a file/version we already know
4521 : : // specifically, no need to elf-begin a file we already determined is non-elf
4522 : : // (so is stored with buildid=NULL)
4523 : : {
4524 : 438 : fts_cached++;
4525 : 438 : return;
4526 : : }
4527 : :
4528 : 668 : bool executable_p = false, debuginfo_p = false; // E and/or D
4529 [ + - ]: 668 : string buildid;
4530 [ + - ]: 668 : set<string> sourcefiles;
4531 : :
4532 [ + - ]: 668 : int fd = open (rps.c_str(), O_RDONLY);
4533 : 668 : try
4534 : : {
4535 [ + - ]: 668 : if (fd >= 0)
4536 [ + - ]: 668 : elf_classify (fd, executable_p, debuginfo_p, buildid, sourcefiles);
4537 : : else
4538 [ # # # # : 0 : throw libc_exception(errno, string("open ") + rps);
# # ]
4539 [ + - + - : 1336 : add_metric ("scanned_bytes_total","source","file",
+ - ]
4540 [ + - ]: 668 : st.st_size);
4541 [ + - + - : 1336 : inc_metric ("scanned_files_total","source","file");
+ - + - ]
4542 : : }
4543 : : // NB: we catch exceptions here too, so that we can
4544 : : // cache the corrupt-elf case (!executable_p &&
4545 : : // !debuginfo_p) just below, just as if we had an
4546 : : // EPERM error from open(2).
4547 [ - - ]: 0 : catch (const reportable_exception& e)
4548 : : {
4549 [ - - ]: 0 : e.report(clog);
4550 : 0 : }
4551 : :
4552 [ + - ]: 668 : if (fd >= 0)
4553 [ + - ]: 668 : close (fd);
4554 : :
4555 [ + + ]: 668 : if (buildid == "")
4556 : : {
4557 : : // no point storing an elf file without buildid
4558 : 572 : executable_p = false;
4559 : 572 : debuginfo_p = false;
4560 : : }
4561 : : else
4562 : : {
4563 : : // register this build-id in the interning table
4564 : 96 : ps_upsert_buildids
4565 [ + - ]: 96 : .reset()
4566 [ + - ]: 96 : .bind(1, buildid)
4567 [ + - ]: 96 : .step_ok_done();
4568 : : }
4569 : :
4570 [ + + ]: 668 : if (executable_p)
4571 : 72 : fts_executable ++;
4572 [ + + ]: 668 : if (debuginfo_p)
4573 : 72 : fts_debuginfo ++;
4574 [ + + + + ]: 668 : if (executable_p || debuginfo_p)
4575 : : {
4576 : 96 : ps_upsert_de
4577 [ + - ]: 96 : .reset()
4578 [ + - ]: 96 : .bind(1, buildid)
4579 [ + + + - ]: 120 : .bind(2, debuginfo_p ? 1 : 0)
4580 [ + + + - ]: 120 : .bind(3, executable_p ? 1 : 0)
4581 [ + - ]: 96 : .bind(4, fileid)
4582 [ + - ]: 96 : .bind(5, st.st_mtime)
4583 [ + - ]: 96 : .step_ok_done();
4584 : : }
4585 [ + + ]: 668 : if (executable_p)
4586 [ + - + - : 144 : inc_metric("found_executable_total","source","files");
+ - + - ]
4587 [ + + ]: 668 : if (debuginfo_p)
4588 [ + - + - : 144 : inc_metric("found_debuginfo_total","source","files");
+ - + - ]
4589 : :
4590 [ + + + - ]: 668 : if (sourcefiles.size() && buildid != "")
4591 : : {
4592 : 72 : fts_sourcefiles += sourcefiles.size();
4593 : :
4594 [ + + ]: 16204 : for (auto&& dwarfsrc : sourcefiles)
4595 : : {
4596 [ + - ]: 16132 : char *srp = realpath(dwarfsrc.c_str(), NULL);
4597 [ - + ]: 16132 : if (srp == NULL) // also if DWZ unresolved dwarfsrc=""
4598 : 0 : continue; // unresolvable files are not a serious problem
4599 : : // throw libc_exception(errno, "fts/file realpath " + srcpath);
4600 [ + - ]: 16132 : string srps = string(srp);
4601 : 16132 : free (srp);
4602 : :
4603 : 16132 : struct stat sfs;
4604 : 16132 : rc = stat(srps.c_str(), &sfs);
4605 [ - + ]: 16132 : if (rc != 0)
4606 : 0 : continue;
4607 : :
4608 [ + - ]: 16132 : if (verbose > 2)
4609 [ + - + - ]: 48396 : obatched(clog) << "recorded buildid=" << buildid << " file=" << srps
4610 [ + - + - : 16132 : << " mtime=" << sfs.st_mtime
+ - + - ]
4611 [ + - + - : 16131 : << " as source " << dwarfsrc << endl;
+ - ]
4612 : :
4613 : : // PR25548: store canonicalized dwarfsrc path
4614 [ + - ]: 16132 : string dwarfsrc_canon = canon_pathname (dwarfsrc);
4615 [ + + ]: 16132 : if (dwarfsrc_canon != dwarfsrc)
4616 : : {
4617 [ + + ]: 3110 : if (verbose > 3)
4618 [ + - + - : 4912 : obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
+ - + - +
- ]
4619 : : }
4620 : :
4621 [ + - ]: 16132 : int64_t fileid1 = register_file_name (ps_upsert_fileparts, ps_upsert_file, ps_lookup_file, dwarfsrc_canon);
4622 [ + - ]: 16132 : int64_t fileid2 = register_file_name (ps_upsert_fileparts, ps_upsert_file, ps_lookup_file, srps);
4623 : :
4624 : 16132 : ps_upsert_s
4625 [ + - ]: 16132 : .reset()
4626 [ + - ]: 16132 : .bind(1, buildid)
4627 [ + - ]: 16126 : .bind(2, fileid1)
4628 [ + - ]: 16124 : .bind(3, fileid2)
4629 [ + - ]: 16128 : .bind(4, sfs.st_mtime)
4630 [ + - ]: 16128 : .step_ok_done();
4631 : :
4632 [ + - + - : 32264 : inc_metric("found_sourcerefs_total","source","files");
+ - + - ]
4633 : 16132 : }
4634 : : }
4635 : :
4636 : 668 : ps_scan_done
4637 [ + - ]: 668 : .reset()
4638 [ + - ]: 668 : .bind(1, fileid)
4639 [ + - ]: 668 : .bind(2, st.st_mtime)
4640 [ + - ]: 668 : .bind(3, st.st_size)
4641 [ + - ]: 668 : .step_ok_done();
4642 : :
4643 [ + - ]: 668 : if (verbose > 2)
4644 [ + - + - ]: 2004 : obatched(clog) << "recorded buildid=" << buildid << " file=" << rps
4645 [ + - + - : 668 : << " mtime=" << st.st_mtime << " atype="
+ - + - ]
4646 : : << (executable_p ? "E" : "")
4647 [ + - + + : 1860 : << (debuginfo_p ? "D" : "") << endl;
+ - + + +
- + - ]
4648 : 668 : }
4649 : :
4650 : :
4651 : :
4652 : :
4653 : :
4654 : : // Analyze given archive file of given age; record buildids / exec/debuginfo-ness of its
4655 : : // constituent files with given upsert statements.
4656 : : static void
4657 : 396 : archive_classify (const string& rps, string& archive_extension, int64_t archiveid,
4658 : : sqlite_ps& ps_upsert_buildids, sqlite_ps& ps_upsert_fileparts, sqlite_ps& ps_upsert_file,
4659 : : sqlite_ps& ps_lookup_file,
4660 : : sqlite_ps& ps_upsert_de, sqlite_ps& ps_upsert_sref, sqlite_ps& ps_upsert_sdef,
4661 : : sqlite_ps& ps_upsert_seekable,
4662 : : time_t mtime,
4663 : : unsigned& fts_executable, unsigned& fts_debuginfo, unsigned& fts_sref, unsigned& fts_sdef,
4664 : : bool& fts_sref_complete_p)
4665 : : {
4666 : 396 : string archive_decoder = "/dev/null";
4667 [ + + ]: 1024 : for (auto&& arch : scan_archives)
4668 [ + + ]: 628 : if (string_endswith(rps, arch.first))
4669 : : {
4670 [ + - ]: 396 : archive_extension = arch.first;
4671 [ + - ]: 1024 : archive_decoder = arch.second;
4672 : : }
4673 : :
4674 : 396 : FILE* fp;
4675 : 396 : defer_dtor<FILE*,int>::dtor_fn dfn;
4676 [ + + ]: 396 : if (archive_decoder != "cat")
4677 : : {
4678 [ + - + - : 80 : string popen_cmd = archive_decoder + " " + shell_escape(rps);
+ - ]
4679 [ + - ]: 40 : fp = popen (popen_cmd.c_str(), "r"); // "e" O_CLOEXEC?
4680 : 40 : dfn = pclose;
4681 [ - + ]: 40 : if (fp == NULL)
4682 [ # # # # : 0 : throw libc_exception (errno, string("popen ") + popen_cmd);
# # ]
4683 : 40 : }
4684 : : else
4685 : : {
4686 [ + - ]: 356 : fp = fopen (rps.c_str(), "r");
4687 : 356 : dfn = fclose;
4688 [ - + ]: 356 : if (fp == NULL)
4689 [ # # # # : 0 : throw libc_exception (errno, string("fopen ") + rps);
# # ]
4690 : : }
4691 : 396 : defer_dtor<FILE*,int> fp_closer (fp, dfn);
4692 : :
4693 : 396 : struct archive *a;
4694 [ + - ]: 396 : a = archive_read_new();
4695 [ - + ]: 396 : if (a == NULL)
4696 [ # # # # ]: 0 : throw archive_exception("cannot create archive reader");
4697 : 396 : defer_dtor<struct archive*,int> archive_closer (a, archive_read_free);
4698 : :
4699 [ + - ]: 396 : int rc = archive_read_support_format_all(a);
4700 [ - + ]: 396 : if (rc != ARCHIVE_OK)
4701 [ # # # # ]: 0 : throw archive_exception(a, "cannot select all formats");
4702 [ + - ]: 396 : rc = archive_read_support_filter_all(a);
4703 [ - + ]: 395 : if (rc != ARCHIVE_OK)
4704 [ # # # # ]: 0 : throw archive_exception(a, "cannot select all filters");
4705 : :
4706 [ + - ]: 395 : rc = archive_read_open_FILE (a, fp);
4707 [ - + ]: 396 : if (rc != ARCHIVE_OK)
4708 : : {
4709 [ # # # # : 0 : obatched(clog) << "cannot open archive from pipe " << rps << endl;
# # ]
4710 [ # # # # ]: 0 : throw archive_exception(a, "cannot open archive from pipe");
4711 : : }
4712 : :
4713 [ + + ]: 396 : if (verbose > 3)
4714 [ + - + - : 712 : obatched(clog) << "libarchive scanning " << rps << " id " << archiveid << endl;
+ - + - +
- ]
4715 : :
4716 [ + - ]: 396 : bool seekable = is_seekable_archive (rps, a);
4717 [ + - + + ]: 396 : if (verbose> 2 && seekable)
4718 [ + - + - : 64 : obatched(clog) << rps << " is seekable" << endl;
+ - ]
4719 : :
4720 : : bool any_exceptions = false;
4721 : 3773 : while(1) // parse archive entries
4722 : : {
4723 [ + - ]: 3773 : if (interrupted)
4724 : : break;
4725 : :
4726 : 3773 : try
4727 : : {
4728 : 3773 : struct archive_entry *e;
4729 [ + - ]: 3773 : rc = archive_read_next_header (a, &e);
4730 [ + + ]: 3774 : if (rc != ARCHIVE_OK)
4731 : : break;
4732 : :
4733 [ + - + + ]: 3378 : if (! S_ISREG(archive_entry_mode (e))) // skip non-files completely
4734 : 2337 : continue;
4735 : :
4736 [ + - ]: 1040 : string fn = canonicalized_archive_entry_pathname (e);
4737 : :
4738 [ + + ]: 1040 : if (verbose > 3)
4739 [ + - + - : 1756 : obatched(clog) << "libarchive checking " << fn << endl;
+ - ]
4740 : :
4741 [ + - ]: 1040 : int64_t seekable_size = archive_entry_size (e);
4742 [ + - ]: 1040 : int64_t seekable_offset = archive_filter_bytes (a, 0);
4743 [ + - ]: 1040 : time_t seekable_mtime = archive_entry_mtime (e);
4744 : :
4745 : : // extract this file to a temporary file
4746 : 1040 : char* tmppath = NULL;
4747 : 1040 : rc = asprintf (&tmppath, "%s/debuginfod-classify.XXXXXX", tmpdir.c_str());
4748 [ - + ]: 1040 : if (rc < 0)
4749 [ # # # # ]: 0 : throw libc_exception (ENOMEM, "cannot allocate tmppath");
4750 : 1040 : defer_dtor<void*,void> tmmpath_freer (tmppath, free);
4751 [ + - ]: 1040 : int fd = mkstemp (tmppath);
4752 [ - + ]: 1040 : if (fd < 0)
4753 [ # # # # ]: 0 : throw libc_exception (errno, "cannot create temporary file");
4754 : 1040 : unlink (tmppath); // unlink now so OS will release the file as soon as we close the fd
4755 : 1040 : defer_dtor<int,int> minifd_closer (fd, close);
4756 : :
4757 [ + - ]: 1040 : rc = archive_read_data_into_fd (a, fd);
4758 [ - + ]: 1040 : if (rc != ARCHIVE_OK) {
4759 [ # # ]: 0 : close (fd);
4760 [ # # # # ]: 0 : throw archive_exception(a, rps, "cannot extract file");
4761 : : }
4762 : :
4763 : : // finally ... time to run elf_classify on this bad boy and update the database
4764 : 1040 : bool executable_p = false, debuginfo_p = false;
4765 [ + - ]: 1040 : string buildid;
4766 [ + - ]: 1040 : set<string> sourcefiles;
4767 [ + - ]: 1040 : elf_classify (fd, executable_p, debuginfo_p, buildid, sourcefiles);
4768 : : // NB: might throw
4769 : :
4770 [ + + ]: 1040 : if (buildid != "") // intern buildid
4771 : : {
4772 : 666 : ps_upsert_buildids
4773 [ + - ]: 666 : .reset()
4774 [ + - ]: 666 : .bind(1, buildid)
4775 [ + - ]: 666 : .step_ok_done();
4776 : : }
4777 : :
4778 [ + - ]: 1040 : int64_t fileid = register_file_name (ps_upsert_fileparts, ps_upsert_file, ps_lookup_file, fn);
4779 : :
4780 [ + + ]: 1040 : if (sourcefiles.size() > 0) // sref records needed
4781 : : {
4782 : : // NB: we intern each source file once. Once raw, as it
4783 : : // appears in the DWARF file list coming back from
4784 : : // elf_classify() - because it'll end up in the
4785 : : // _norm.artifactsrc column. We don't also put another
4786 : : // version with a '.' at the front, even though that's
4787 : : // how rpm/cpio packs names, because we hide that from
4788 : : // the database for storage efficiency.
4789 : :
4790 [ + + ]: 802 : for (auto&& s : sourcefiles)
4791 : : {
4792 [ + + ]: 498 : if (s == "")
4793 : : {
4794 : 20 : fts_sref_complete_p = false;
4795 : 20 : continue;
4796 : : }
4797 : :
4798 : : // PR25548: store canonicalized source path
4799 : 478 : const string& dwarfsrc = s;
4800 [ + - ]: 478 : string dwarfsrc_canon = canon_pathname (dwarfsrc);
4801 [ + + ]: 478 : if (dwarfsrc_canon != dwarfsrc)
4802 : : {
4803 [ + - ]: 28 : if (verbose > 3)
4804 [ + - + - : 56 : obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
+ - + - +
- ]
4805 : : }
4806 : :
4807 [ + - ]: 478 : int64_t srcfileid = register_file_name(ps_upsert_fileparts, ps_upsert_file, ps_lookup_file,
4808 : : dwarfsrc_canon);
4809 : :
4810 : 478 : ps_upsert_sref
4811 [ + - ]: 478 : .reset()
4812 [ + - ]: 478 : .bind(1, buildid)
4813 [ + - ]: 478 : .bind(2, srcfileid)
4814 [ + - ]: 478 : .step_ok_done();
4815 : :
4816 : 478 : fts_sref ++;
4817 : 478 : }
4818 : : }
4819 : :
4820 [ + + ]: 1040 : if (executable_p)
4821 : 302 : fts_executable ++;
4822 [ + + ]: 1040 : if (debuginfo_p)
4823 : 366 : fts_debuginfo ++;
4824 : :
4825 [ + + + + ]: 1040 : if (executable_p || debuginfo_p)
4826 : : {
4827 : 666 : ps_upsert_de
4828 [ + - ]: 666 : .reset()
4829 [ + - ]: 666 : .bind(1, buildid)
4830 [ + + + - ]: 966 : .bind(2, debuginfo_p ? 1 : 0)
4831 [ + + + - ]: 1030 : .bind(3, executable_p ? 1 : 0)
4832 [ + - ]: 666 : .bind(4, archiveid)
4833 [ + - ]: 666 : .bind(5, mtime)
4834 [ + - ]: 666 : .bind(6, fileid)
4835 [ + - ]: 666 : .step_ok_done();
4836 [ + + ]: 666 : if (seekable)
4837 : 336 : ps_upsert_seekable
4838 [ + - ]: 336 : .reset()
4839 [ + - ]: 336 : .bind(1, archiveid)
4840 [ + - ]: 336 : .bind(2, fileid)
4841 [ + - ]: 336 : .bind(3, seekable_size)
4842 [ + - ]: 336 : .bind(4, seekable_offset)
4843 [ + - ]: 336 : .bind(5, seekable_mtime)
4844 [ + - ]: 336 : .step_ok_done();
4845 : : }
4846 : : else // potential source - sdef record
4847 : : {
4848 : 374 : fts_sdef ++;
4849 : 374 : ps_upsert_sdef
4850 [ + - ]: 374 : .reset()
4851 [ + - ]: 374 : .bind(1, archiveid)
4852 [ + - ]: 374 : .bind(2, mtime)
4853 [ + - ]: 374 : .bind(3, fileid)
4854 [ + - ]: 374 : .step_ok_done();
4855 : : }
4856 : :
4857 [ + - + + : 1040 : if ((verbose > 2) && (executable_p || debuginfo_p))
+ + ]
4858 : : {
4859 [ + - ]: 666 : obatched ob(clog);
4860 [ + - + - ]: 666 : auto& o = ob << "recorded buildid=" << buildid << " rpm=" << rps << " file=" << fn
4861 [ + - + - : 666 : << " mtime=" << mtime << " atype="
+ - + - +
- + - ]
4862 : : << (executable_p ? "E" : "")
4863 : : << (debuginfo_p ? "D" : "")
4864 [ + - + + : 1330 : << " sourcefiles=" << sourcefiles.size();
+ - + + +
- + - +
- ]
4865 [ + + ]: 666 : if (seekable)
4866 [ + - + - ]: 336 : o << " seekable size=" << seekable_size
4867 [ + - + - ]: 336 : << " offset=" << seekable_offset
4868 [ + - + - ]: 336 : << " mtime=" << seekable_mtime;
4869 [ + - ]: 666 : o << endl;
4870 : 666 : }
4871 : :
4872 : 1040 : }
4873 [ - - ]: 0 : catch (const reportable_exception& e)
4874 : : {
4875 [ - - ]: 0 : e.report(clog);
4876 : 0 : any_exceptions = true;
4877 : : // NB: but we allow the libarchive iteration to continue, in
4878 : : // case we can still gather some useful information. That
4879 : : // would allow some webapi queries to work, until later when
4880 : : // this archive is rescanned. (Its vitals won't go into the
4881 : : // _file_mtime_scanned table until after a successful scan.)
4882 : 0 : }
4883 : : }
4884 : :
4885 [ - + ]: 396 : if (any_exceptions)
4886 [ # # ]: 0 : throw reportable_exception("exceptions encountered during archive scan");
4887 : 396 : }
4888 : :
4889 : :
4890 : :
4891 : : // scan for archive files such as .rpm
4892 : : static void
4893 : 762 : scan_archive_file (const string& rps, const stat_t& st,
4894 : : sqlite_ps& ps_upsert_buildids,
4895 : : sqlite_ps& ps_upsert_fileparts,
4896 : : sqlite_ps& ps_upsert_file,
4897 : : sqlite_ps& ps_lookup_file,
4898 : : sqlite_ps& ps_upsert_de,
4899 : : sqlite_ps& ps_upsert_sref,
4900 : : sqlite_ps& ps_upsert_sdef,
4901 : : sqlite_ps& ps_upsert_seekable,
4902 : : sqlite_ps& ps_query,
4903 : : sqlite_ps& ps_scan_done,
4904 : : unsigned& fts_cached,
4905 : : unsigned& fts_executable,
4906 : : unsigned& fts_debuginfo,
4907 : : unsigned& fts_sref,
4908 : : unsigned& fts_sdef)
4909 : : {
4910 : : // intern the archive file name
4911 : 762 : int64_t archiveid = register_file_name (ps_upsert_fileparts, ps_upsert_file, ps_lookup_file, rps);
4912 : :
4913 : : /* See if we know of it already. */
4914 : 762 : int rc = ps_query
4915 : 762 : .reset()
4916 : 762 : .bind(1, archiveid)
4917 : 762 : .bind(2, st.st_mtime)
4918 : 762 : .step();
4919 : 762 : ps_query.reset();
4920 [ + + ]: 762 : if (rc == SQLITE_ROW) // i.e., a result, as opposed to DONE (no results)
4921 : : // no need to recheck a file/version we already know
4922 : : // specifically, no need to parse this archive again, since we already have
4923 : : // it as a D or E or S record,
4924 : : // (so is stored with buildid=NULL)
4925 : : {
4926 : 366 : fts_cached ++;
4927 : 366 : return;
4928 : : }
4929 : :
4930 : : // extract the archive contents
4931 : 396 : unsigned my_fts_executable = 0, my_fts_debuginfo = 0, my_fts_sref = 0, my_fts_sdef = 0;
4932 : 396 : bool my_fts_sref_complete_p = true;
4933 : 396 : bool any_exceptions = false;
4934 : 396 : try
4935 : : {
4936 [ + - ]: 396 : string archive_extension;
4937 : 396 : archive_classify (rps, archive_extension, archiveid,
4938 : : ps_upsert_buildids, ps_upsert_fileparts, ps_upsert_file, ps_lookup_file,
4939 : : ps_upsert_de, ps_upsert_sref, ps_upsert_sdef, ps_upsert_seekable, // dalt
4940 [ + - ]: 396 : st.st_mtime,
4941 : : my_fts_executable, my_fts_debuginfo, my_fts_sref, my_fts_sdef,
4942 : : my_fts_sref_complete_p);
4943 [ + - + - : 792 : add_metric ("scanned_bytes_total","source",archive_extension + " archive",
+ - ]
4944 [ + - ]: 396 : st.st_size);
4945 [ + - + - : 792 : inc_metric ("scanned_files_total","source",archive_extension + " archive");
+ - + - ]
4946 [ + - + - : 792 : add_metric("found_debuginfo_total","source",archive_extension + " archive",
+ - + - ]
4947 : : my_fts_debuginfo);
4948 [ + - + - : 792 : add_metric("found_executable_total","source",archive_extension + " archive",
+ - + - ]
4949 : : my_fts_executable);
4950 [ + - + - : 792 : add_metric("found_sourcerefs_total","source",archive_extension + " archive",
+ - + - ]
4951 : : my_fts_sref);
4952 : 396 : }
4953 [ - - ]: 0 : catch (const reportable_exception& e)
4954 : : {
4955 [ - - ]: 0 : e.report(clog);
4956 : 0 : any_exceptions = true;
4957 : 0 : }
4958 : :
4959 [ + - ]: 396 : if (verbose > 2)
4960 [ + - ]: 1188 : obatched(clog) << "scanned archive=" << rps
4961 [ + - + - ]: 396 : << " mtime=" << st.st_mtime
4962 [ + - ]: 396 : << " executables=" << my_fts_executable
4963 [ + - + - ]: 396 : << " debuginfos=" << my_fts_debuginfo
4964 [ + - + - ]: 396 : << " srefs=" << my_fts_sref
4965 [ + - + - ]: 396 : << " sdefs=" << my_fts_sdef
4966 [ + - + - : 396 : << " exceptions=" << any_exceptions
+ - + - ]
4967 : 396 : << endl;
4968 : :
4969 : 396 : fts_executable += my_fts_executable;
4970 : 396 : fts_debuginfo += my_fts_debuginfo;
4971 : 396 : fts_sref += my_fts_sref;
4972 : 396 : fts_sdef += my_fts_sdef;
4973 : :
4974 [ - + ]: 396 : if (any_exceptions)
4975 [ # # ]: 0 : throw reportable_exception("exceptions encountered during archive scan");
4976 : :
4977 [ + + ]: 396 : if (my_fts_sref_complete_p) // leave incomplete?
4978 : 394 : ps_scan_done
4979 : 394 : .reset()
4980 : 394 : .bind(1, archiveid)
4981 : 394 : .bind(2, st.st_mtime)
4982 : 394 : .bind(3, st.st_size)
4983 : 394 : .step_ok_done();
4984 : : }
4985 : :
4986 : :
4987 : :
4988 : : ////////////////////////////////////////////////////////////////////////
4989 : :
4990 : :
4991 : :
4992 : : // The thread that consumes file names off of the scanq. We hold
4993 : : // the persistent sqlite_ps's at this level and delegate file/archive
4994 : : // scanning to other functions.
4995 : : static void
4996 : 288 : scan ()
4997 : : {
4998 : : // all the prepared statements fit to use, the _f_ set:
4999 [ + - + - ]: 576 : sqlite_ps ps_f_upsert_buildids (db, "file-buildids-intern", "insert or ignore into " BUILDIDS "_buildids VALUES (NULL, ?);");
5000 [ + - + - : 576 : sqlite_ps ps_f_upsert_fileparts (db, "file-fileparts-intern", "insert or ignore into " BUILDIDS "_fileparts VALUES (NULL, ?);");
+ - ]
5001 [ + - ]: 288 : sqlite_ps ps_f_upsert_file (db, "file-file-intern", "insert or ignore into " BUILDIDS "_files VALUES (NULL, \n"
5002 : : "(select id from " BUILDIDS "_fileparts where name = ?),\n"
5003 [ + - + - : 576 : "(select id from " BUILDIDS "_fileparts where name = ?));");
+ - ]
5004 [ + - ]: 288 : sqlite_ps ps_f_lookup_file (db, "file-file-lookup",
5005 : : "select f.id\n"
5006 : : " from " BUILDIDS "_files f, " BUILDIDS "_fileparts p1, " BUILDIDS "_fileparts p2 \n"
5007 [ + - + - : 576 : " where f.dirname = p1.id and f.basename = p2.id and p1.name = ? and p2.name = ?;\n");
+ - ]
5008 [ + - ]: 288 : sqlite_ps ps_f_upsert_de (db, "file-de-upsert",
5009 : : "insert or ignore into " BUILDIDS "_f_de "
5010 : : "(buildid, debuginfo_p, executable_p, file, mtime) "
5011 : : "values ((select id from " BUILDIDS "_buildids where hex = ?),"
5012 [ + - + - : 576 : " ?,?,?,?);");
+ - ]
5013 [ + - ]: 288 : sqlite_ps ps_f_upsert_s (db, "file-s-upsert",
5014 : : "insert or ignore into " BUILDIDS "_f_s "
5015 : : "(buildid, artifactsrc, file, mtime) "
5016 : : "values ((select id from " BUILDIDS "_buildids where hex = ?),"
5017 [ + - + - : 576 : " ?,?,?);");
+ - ]
5018 [ + - ]: 288 : sqlite_ps ps_f_query (db, "file-negativehit-find",
5019 : : "select 1 from " BUILDIDS "_file_mtime_scanned where sourcetype = 'F' "
5020 [ + - + - : 576 : "and file = ? and mtime = ?;");
+ - ]
5021 [ + - ]: 288 : sqlite_ps ps_f_scan_done (db, "file-scanned",
5022 : : "insert or ignore into " BUILDIDS "_file_mtime_scanned (sourcetype, file, mtime, size)"
5023 [ + - + - : 576 : "values ('F', ?,?,?);");
+ - ]
5024 : :
5025 : : // and now for the _r_ set
5026 [ + - + - : 576 : sqlite_ps ps_r_upsert_buildids (db, "rpm-buildid-intern", "insert or ignore into " BUILDIDS "_buildids VALUES (NULL, ?);");
+ - ]
5027 [ + - + - : 576 : sqlite_ps ps_r_upsert_fileparts (db, "rpm-fileparts-intern", "insert or ignore into " BUILDIDS "_fileparts VALUES (NULL, ?);");
+ - ]
5028 [ + - ]: 288 : sqlite_ps ps_r_upsert_file (db, "rpm-file-intern", "insert or ignore into " BUILDIDS "_files VALUES (NULL, \n"
5029 : : "(select id from " BUILDIDS "_fileparts where name = ?),\n"
5030 [ + - + - : 576 : "(select id from " BUILDIDS "_fileparts where name = ?));");
+ - ]
5031 [ + - ]: 288 : sqlite_ps ps_r_lookup_file (db, "rpm-file-lookup",
5032 : : "select f.id\n"
5033 : : " from " BUILDIDS "_files f, " BUILDIDS "_fileparts p1, " BUILDIDS "_fileparts p2 \n"
5034 [ + - + - : 576 : " where f.dirname = p1.id and f.basename = p2.id and p1.name = ? and p2.name = ?;\n");
+ - ]
5035 [ + - ]: 288 : sqlite_ps ps_r_upsert_de (db, "rpm-de-insert",
5036 : : "insert or ignore into " BUILDIDS "_r_de (buildid, debuginfo_p, executable_p, file, mtime, content) values ("
5037 [ + - + - : 576 : "(select id from " BUILDIDS "_buildids where hex = ?), ?, ?, ?, ?, ?);");
+ - ]
5038 [ + - ]: 288 : sqlite_ps ps_r_upsert_sref (db, "rpm-sref-insert",
5039 : : "insert or ignore into " BUILDIDS "_r_sref (buildid, artifactsrc) values ("
5040 : : "(select id from " BUILDIDS "_buildids where hex = ?), "
5041 [ + - + - : 576 : "?);");
+ - ]
5042 [ + - ]: 288 : sqlite_ps ps_r_upsert_sdef (db, "rpm-sdef-insert",
5043 : : "insert or ignore into " BUILDIDS "_r_sdef (file, mtime, content) values ("
5044 [ + - + - : 576 : "?, ?, ?);");
+ - ]
5045 [ + - ]: 288 : sqlite_ps ps_r_upsert_seekable (db, "rpm-seekable-insert",
5046 : : "insert or ignore into " BUILDIDS "_r_seekable (file, content, type, size, offset, mtime) "
5047 [ + - + - : 576 : "values (?, ?, 'xz', ?, ?, ?);");
+ - ]
5048 [ + - ]: 288 : sqlite_ps ps_r_query (db, "rpm-negativehit-query",
5049 : : "select 1 from " BUILDIDS "_file_mtime_scanned where "
5050 [ + - + - : 576 : "sourcetype = 'R' and file = ? and mtime = ?;");
+ - ]
5051 [ + - ]: 288 : sqlite_ps ps_r_scan_done (db, "rpm-scanned",
5052 : : "insert or ignore into " BUILDIDS "_file_mtime_scanned (sourcetype, file, mtime, size)"
5053 [ + - + - : 576 : "values ('R', ?, ?, ?);");
+ - ]
5054 : :
5055 : :
5056 : 288 : unsigned fts_cached = 0, fts_executable = 0, fts_debuginfo = 0, fts_sourcefiles = 0;
5057 : 288 : unsigned fts_sref = 0, fts_sdef = 0;
5058 : :
5059 [ + - + - : 576 : add_metric("thread_count", "role", "scan", 1);
+ - + - ]
5060 [ + - + - : 576 : add_metric("thread_busy", "role", "scan", 1);
+ - + - ]
5061 [ + + ]: 1890 : while (! interrupted)
5062 : : {
5063 [ + - ]: 1602 : scan_payload p;
5064 : :
5065 [ + - + - : 3204 : add_metric("thread_busy", "role", "scan", -1);
+ - + - ]
5066 : : // NB: threads may be blocked within either of these two waiting
5067 : : // states, if the work queue happens to run dry. That's OK.
5068 [ + - + - ]: 1602 : if (scan_barrier) scan_barrier->count();
5069 [ + - ]: 1602 : bool gotone = scanq.wait_front(p);
5070 [ + - + - : 3203 : add_metric("thread_busy", "role", "scan", 1);
+ - + - ]
5071 : :
5072 [ + + ]: 1602 : if (! gotone) continue; // go back to waiting
5073 : :
5074 : 1314 : try
5075 : : {
5076 : 1314 : bool scan_archive = false;
5077 [ + + ]: 2884 : for (auto&& arch : scan_archives)
5078 [ + + ]: 1570 : if (string_endswith(p.first, arch.first))
5079 : 762 : scan_archive = true;
5080 : :
5081 [ + + ]: 1314 : if (scan_archive)
5082 [ + - ]: 762 : scan_archive_file (p.first, p.second,
5083 : : ps_r_upsert_buildids,
5084 : : ps_r_upsert_fileparts,
5085 : : ps_r_upsert_file,
5086 : : ps_r_lookup_file,
5087 : : ps_r_upsert_de,
5088 : : ps_r_upsert_sref,
5089 : : ps_r_upsert_sdef,
5090 : : ps_r_upsert_seekable,
5091 : : ps_r_query,
5092 : : ps_r_scan_done,
5093 : : fts_cached,
5094 : : fts_executable,
5095 : : fts_debuginfo,
5096 : : fts_sref,
5097 : : fts_sdef);
5098 : :
5099 [ + + ]: 1314 : if (scan_files) // NB: maybe "else if" ?
5100 [ + - ]: 1106 : scan_source_file (p.first, p.second,
5101 : : ps_f_upsert_buildids,
5102 : : ps_f_upsert_fileparts,
5103 : : ps_f_upsert_file,
5104 : : ps_f_lookup_file,
5105 : : ps_f_upsert_de,
5106 : : ps_f_upsert_s,
5107 : : ps_f_query,
5108 : : ps_f_scan_done,
5109 : : fts_cached, fts_executable, fts_debuginfo, fts_sourcefiles);
5110 : : }
5111 [ - - ]: 0 : catch (const reportable_exception& e)
5112 : : {
5113 [ - - ]: 0 : e.report(cerr);
5114 : 0 : }
5115 : :
5116 [ + - ]: 1314 : scanq.done_front(); // let idlers run
5117 : :
5118 : 1314 : if (fts_cached || fts_executable || fts_debuginfo || fts_sourcefiles || fts_sref || fts_sdef)
5119 : : {} // NB: not just if a successful scan - we might have encountered -ENOSPC & failed
5120 [ + - + - ]: 1314 : (void) statfs_free_enough_p(db_path, "database"); // report sqlite filesystem size
5121 [ + - + - ]: 1314 : (void) statfs_free_enough_p(tmpdir, "tmpdir"); // this too, in case of fdcache/tmpfile usage
5122 : :
5123 : : // finished a scanning step -- not a "loop", because we just
5124 : : // consume the traversal loop's work, whenever
5125 [ + - + - : 2628 : inc_metric("thread_work_total","role","scan");
+ - + - ]
5126 : 1602 : }
5127 : :
5128 [ + - + - : 576 : add_metric("thread_busy", "role", "scan", -1);
+ - + - ]
5129 : 288 : }
5130 : :
5131 : :
5132 : : // Use this function as the thread entry point, so it can catch our
5133 : : // fleet of exceptions (incl. the sqlite_ps ctors) and report.
5134 : : static void*
5135 : 288 : thread_main_scanner (void* arg)
5136 : : {
5137 : 288 : (void) arg;
5138 [ + + ]: 864 : while (! interrupted)
5139 : 288 : try
5140 : : {
5141 [ + - ]: 288 : scan();
5142 : : }
5143 [ - - ]: 0 : catch (const reportable_exception& e)
5144 : : {
5145 [ - - ]: 0 : e.report(cerr);
5146 : 0 : }
5147 : 288 : return 0;
5148 : : }
5149 : :
5150 : :
5151 : :
5152 : : // The thread that traverses all the source_paths and enqueues all the
5153 : : // matching files into the file/archive scan queue.
5154 : : static void
5155 : 126 : scan_source_paths()
5156 : : {
5157 : : // NB: fedora 31 glibc/fts(3) crashes inside fts_read() on empty
5158 : : // path list.
5159 [ + + ]: 126 : if (source_paths.empty())
5160 : 2 : return;
5161 : :
5162 : : // Turn the source_paths into an fts(3)-compatible char**. Since
5163 : : // source_paths[] does not change after argv processing, the
5164 : : // c_str()'s are safe to keep around awile.
5165 : 124 : vector<const char *> sps;
5166 [ + + ]: 332 : for (auto&& sp: source_paths)
5167 [ + - ]: 208 : sps.push_back(sp.c_str());
5168 [ + - - - ]: 124 : sps.push_back(NULL);
5169 : :
5170 [ + + + - ]: 234 : FTS *fts = fts_open ((char * const *)sps.data(),
5171 : : (traverse_logical ? FTS_LOGICAL : FTS_PHYSICAL|FTS_XDEV)
5172 : : | FTS_NOCHDIR /* multithreaded */,
5173 : : NULL);
5174 [ - + ]: 124 : if (fts == NULL)
5175 [ # # # # ]: 0 : throw libc_exception(errno, "cannot fts_open");
5176 : 124 : defer_dtor<FTS*,int> fts_cleanup (fts, fts_close);
5177 : :
5178 : 124 : struct timespec ts_start, ts_end;
5179 : 124 : clock_gettime (CLOCK_MONOTONIC, &ts_start);
5180 : 124 : unsigned fts_scanned = 0, fts_regex = 0;
5181 : :
5182 : 124 : FTSENT *f;
5183 [ + - + + ]: 2644 : while ((f = fts_read (fts)) != NULL)
5184 : : {
5185 [ + - ]: 2396 : if (interrupted) break;
5186 : :
5187 [ - + ]: 2396 : if (sigusr2 != forced_groom_count) // stop early if groom triggered
5188 : : {
5189 [ # # ]: 0 : scanq.clear(); // clear previously issued work for scanner threads
5190 : : break;
5191 : : }
5192 : :
5193 : 2396 : fts_scanned ++;
5194 : :
5195 [ - + - - : 2396 : if (max_depth >= 0 && (f->fts_info == FTS_D || f->fts_info == FTS_DP) &&
- - ]
5196 [ # # ]: 0 : f->fts_level > max_depth)
5197 : : {
5198 : 0 : fts_set(fts, f, FTS_SKIP);
5199 [ # # ]: 0 : if (verbose > 2)
5200 [ # # ]: 0 : obatched(clog) << "fts skip " << f->fts_path
5201 : 0 : << (f->fts_info == FTS_D ? " pre-traversal" :
5202 [ # # # # : 0 : " post-traversal") << endl;
# # # # ]
5203 : 0 : continue;
5204 : 2396 : };
5205 : :
5206 [ + - ]: 2396 : if (verbose > 2)
5207 [ + - + - : 4792 : obatched(clog) << "fts traversing " << f->fts_path << endl;
+ - ]
5208 : :
5209 [ + + + + : 2396 : switch (f->fts_info)
+ ]
5210 : : {
5211 : 1424 : case FTS_F:
5212 : 1424 : {
5213 : : /* Found a file. Convert it to an absolute path, so
5214 : : the buildid database does not have relative path
5215 : : names that are unresolvable from a subsequent run
5216 : : in a different cwd. */
5217 [ + - ]: 1424 : char *rp = realpath(f->fts_path, NULL);
5218 [ - + ]: 1424 : if (rp == NULL)
5219 : 0 : continue; // ignore dangling symlink or such
5220 [ + - ]: 1424 : string rps = string(rp);
5221 : 1424 : free (rp);
5222 : :
5223 [ + - ]: 1424 : bool ri = !regexec (&file_include_regex, rps.c_str(), 0, 0, 0);
5224 [ + - ]: 1424 : bool rx = !regexec (&file_exclude_regex, rps.c_str(), 0, 0, 0);
5225 [ + + ]: 1424 : if (!ri || rx)
5226 : : {
5227 [ + - ]: 110 : if (verbose > 3)
5228 [ + - ]: 220 : obatched(clog) << "fts skipped by regex "
5229 [ + + + - : 134 : << (!ri ? "I" : "") << (rx ? "X" : "") << endl;
+ + + - +
- ]
5230 : 110 : fts_regex ++;
5231 [ + + ]: 110 : if (!ri)
5232 [ + - + - : 24 : inc_metric("traversed_total","type","file-skipped-I");
+ - + - ]
5233 [ + + ]: 110 : if (rx)
5234 [ + - + - : 196 : inc_metric("traversed_total","type","file-skipped-X");
+ - + - ]
5235 : : }
5236 : : else
5237 : : {
5238 [ + - + - ]: 1314 : scanq.push_back (make_pair(rps, *f->fts_statp));
5239 [ + - + - : 2628 : inc_metric("traversed_total","type","file");
+ - + - ]
5240 : : }
5241 : 0 : }
5242 : 1424 : break;
5243 : :
5244 : 4 : case FTS_ERR:
5245 : 4 : case FTS_NS:
5246 : : // report on some types of errors because they may reflect fixable misconfiguration
5247 : 4 : {
5248 [ + - + - : 8 : auto x = libc_exception(f->fts_errno, string("fts traversal ") + string(f->fts_path));
+ - + - ]
5249 [ + - ]: 4 : x.report(cerr);
5250 : 0 : }
5251 [ + - + - : 8 : inc_metric("traversed_total","type","error");
+ - + - ]
5252 : 4 : break;
5253 : :
5254 : 32 : case FTS_SL: // ignore, but count because debuginfod -L would traverse these
5255 [ + - + - : 64 : inc_metric("traversed_total","type","symlink");
+ - + - ]
5256 : 32 : break;
5257 : :
5258 : 468 : case FTS_D: // ignore
5259 [ + - + - : 936 : inc_metric("traversed_total","type","directory");
+ - + - ]
5260 : 468 : break;
5261 : :
5262 : 468 : default: // ignore
5263 [ + - + - : 936 : inc_metric("traversed_total","type","other");
+ - + - ]
5264 : 468 : break;
5265 : : }
5266 : : }
5267 : 124 : clock_gettime (CLOCK_MONOTONIC, &ts_end);
5268 : 124 : double deltas = (ts_end.tv_sec - ts_start.tv_sec) + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9;
5269 : :
5270 [ + - + - : 372 : obatched(clog) << "fts traversed source paths in " << deltas << "s, scanned=" << fts_scanned
+ - + - ]
5271 [ + - + - : 124 : << ", regex-skipped=" << fts_regex << endl;
+ - ]
5272 [ + - ]: 248 : }
5273 : :
5274 : :
5275 : : static void*
5276 : 72 : thread_main_fts_source_paths (void* arg)
5277 : : {
5278 : 72 : (void) arg; // ignore; we operate on global data
5279 : :
5280 [ + - + - : 144 : set_metric("thread_tid", "role","traverse", tid());
+ - ]
5281 [ + - + - : 144 : add_metric("thread_count", "role", "traverse", 1);
+ - ]
5282 : :
5283 : 72 : time_t last_rescan = 0;
5284 : :
5285 [ + - ]: 290 : while (! interrupted)
5286 : : {
5287 : 290 : sleep (1);
5288 : 290 : scanq.wait_idle(); // don't start a new traversal while scanners haven't finished the job
5289 : 290 : scanq.done_idle(); // release the hounds
5290 [ + + ]: 290 : if (interrupted) break;
5291 : :
5292 : 218 : time_t now = time(NULL);
5293 : 218 : bool rescan_now = false;
5294 [ + + ]: 218 : if (last_rescan == 0) // at least one initial rescan is documented even for -t0
5295 : 70 : rescan_now = true;
5296 [ + + + + ]: 218 : if (rescan_s > 0 && (long)now > (long)(last_rescan + rescan_s))
5297 : 218 : rescan_now = true;
5298 [ + + ]: 218 : if (sigusr1 != forced_rescan_count)
5299 : : {
5300 : 58 : forced_rescan_count = sigusr1;
5301 : 58 : rescan_now = true;
5302 : : }
5303 [ + + ]: 218 : if (rescan_now)
5304 : : {
5305 [ + - + - : 252 : set_metric("thread_busy", "role","traverse", 1);
+ - ]
5306 : 126 : try
5307 : : {
5308 [ + - ]: 126 : scan_source_paths();
5309 : : }
5310 [ - - ]: 0 : catch (const reportable_exception& e)
5311 : : {
5312 [ - - ]: 0 : e.report(cerr);
5313 : 0 : }
5314 : 126 : last_rescan = time(NULL); // NB: now was before scanning
5315 : : // finished a traversal loop
5316 [ + - + - : 252 : inc_metric("thread_work_total", "role","traverse");
+ - ]
5317 [ + - + - : 252 : set_metric("thread_busy", "role","traverse", 0);
+ - ]
5318 : : }
5319 : : }
5320 : :
5321 : 72 : return 0;
5322 : : }
5323 : :
5324 : :
5325 : :
5326 : : ////////////////////////////////////////////////////////////////////////
5327 : :
5328 : : static void
5329 : 78 : database_stats_report()
5330 : : {
5331 : 78 : sqlite_ps ps_query (db, "database-overview",
5332 [ + - + - ]: 156 : "select label,quantity from " BUILDIDS "_stats");
5333 : :
5334 [ + - + - ]: 156 : obatched(clog) << "database record counts:" << endl;
5335 : 1794 : while (1)
5336 : : {
5337 [ + - ]: 936 : if (interrupted) break;
5338 [ + - ]: 936 : if (sigusr1 != forced_rescan_count) // stop early if scan triggered
5339 : : break;
5340 : :
5341 [ + - ]: 936 : int rc = ps_query.step();
5342 [ + + ]: 936 : if (rc == SQLITE_DONE) break;
5343 [ - + ]: 858 : if (rc != SQLITE_ROW)
5344 [ # # # # ]: 0 : throw sqlite_exception(rc, "step");
5345 : :
5346 [ + - ]: 858 : obatched(clog)
5347 [ + - - + : 858 : << ((const char*) sqlite3_column_text(ps_query, 0) ?: (const char*) "NULL")
+ - ]
5348 : : << " "
5349 [ + - + - : 1716 : << (sqlite3_column_text(ps_query, 1) ?: (const unsigned char*) "NULL")
- + + - ]
5350 : 858 : << endl;
5351 : :
5352 [ + - + - : 1716 : set_metric("groom", "statistic",
- + + - +
- + - +
- ]
5353 [ + - ]: 858 : ((const char*) sqlite3_column_text(ps_query, 0) ?: (const char*) "NULL"),
5354 : : (sqlite3_column_double(ps_query, 1)));
5355 : 858 : }
5356 : 78 : }
5357 : :
5358 : :
5359 : : // Do a round of database grooming that might take many minutes to run.
5360 : 78 : void groom()
5361 : : {
5362 [ + - ]: 156 : obatched(clog) << "grooming database" << endl;
5363 : :
5364 : 78 : struct timespec ts_start, ts_end;
5365 : 78 : clock_gettime (CLOCK_MONOTONIC, &ts_start);
5366 : :
5367 : : // scan for files that have disappeared
5368 : 78 : sqlite_ps files (db, "check old files",
5369 : : "select distinct s.mtime, s.file, f.name from "
5370 : : BUILDIDS "_file_mtime_scanned s, " BUILDIDS "_files_v f "
5371 [ + - + - ]: 156 : "where f.id = s.file");
5372 : : // NB: Because _ftime_mtime_scanned can contain both F and
5373 : : // R records for the same file, this query would return duplicates if the
5374 : : // DISTINCT qualifier were not there.
5375 [ + - ]: 78 : files.reset();
5376 : :
5377 : : // DECISION TIME - we enumerate stale fileids/mtimes
5378 [ + - ]: 78 : deque<pair<int64_t,int64_t> > stale_fileid_mtime;
5379 : :
5380 : 78 : time_t time_start = time(NULL);
5381 : 342 : while(1)
5382 : : {
5383 : : // PR28514: limit grooming iteration to O(rescan time), to avoid
5384 : : // slow filesystem tests over many files locking out rescans for
5385 : : // too long.
5386 [ + + - + ]: 210 : if (rescan_s > 0 && (long)time(NULL) > (long)(time_start + rescan_s))
5387 : : {
5388 [ # # # # : 0 : inc_metric("groomed_total", "decision", "aborted");
# # # # ]
5389 : 0 : break;
5390 : : }
5391 : :
5392 [ + - ]: 210 : if (interrupted) break;
5393 : :
5394 [ + - ]: 210 : int rc = files.step();
5395 [ + + ]: 210 : if (rc != SQLITE_ROW)
5396 : : break;
5397 : :
5398 [ + - ]: 132 : int64_t mtime = sqlite3_column_int64 (files, 0);
5399 [ + - ]: 132 : int64_t fileid = sqlite3_column_int64 (files, 1);
5400 [ + - - + ]: 132 : const char* filename = ((const char*) sqlite3_column_text (files, 2) ?: "");
5401 : 132 : struct stat s;
5402 : 132 : bool regex_file_drop = 0;
5403 : :
5404 [ + + ]: 132 : if (regex_groom)
5405 : : {
5406 [ + - ]: 16 : bool reg_include = !regexec (&file_include_regex, filename, 0, 0, 0);
5407 [ + - ]: 16 : bool reg_exclude = !regexec (&file_exclude_regex, filename, 0, 0, 0);
5408 : 16 : regex_file_drop = !reg_include || reg_exclude; // match logic of scan_source_paths
5409 : : }
5410 : :
5411 : 132 : rc = stat(filename, &s);
5412 [ + + - + ]: 132 : if ( regex_file_drop || rc < 0 || (mtime != (int64_t) s.st_mtime) )
5413 : : {
5414 [ + - ]: 24 : if (verbose > 2)
5415 [ + - + - : 48 : obatched(clog) << "groom: stale file=" << filename << " mtime=" << mtime << endl;
+ - + - +
- ]
5416 [ + - ]: 24 : stale_fileid_mtime.push_back(make_pair(fileid,mtime));
5417 [ + - + - : 48 : inc_metric("groomed_total", "decision", "stale");
+ - + - ]
5418 [ + - + - : 48 : set_metric("thread_work_pending","role","groom", stale_fileid_mtime.size());
+ - + - ]
5419 : : }
5420 : : else
5421 [ + - + - : 216 : inc_metric("groomed_total", "decision", "fresh");
+ - + - ]
5422 : :
5423 [ + - ]: 132 : if (sigusr1 != forced_rescan_count) // stop early if scan triggered
5424 : : break;
5425 : 132 : }
5426 [ + - ]: 78 : files.reset();
5427 : :
5428 : : // ACTION TIME
5429 : :
5430 : : // Now that we know which file/mtime tuples are stale, actually do
5431 : : // the deletion from the database. Doing this during the SELECT
5432 : : // iteration above results in undefined behaviour in sqlite, as per
5433 : : // https://www.sqlite.org/isolation.html
5434 : :
5435 : : // We could shuffle stale_fileid_mtime[] here. It'd let aborted
5436 : : // sequences of nuke operations resume at random locations, instead
5437 : : // of just starting over. But it doesn't matter much either way,
5438 : : // as long as we make progress.
5439 : :
5440 [ + - + - : 156 : sqlite_ps files_del_f_de (db, "nuke f_de", "delete from " BUILDIDS "_f_de where file = ? and mtime = ?");
+ - ]
5441 [ + - + - : 156 : sqlite_ps files_del_r_de (db, "nuke r_de", "delete from " BUILDIDS "_r_de where file = ? and mtime = ?");
+ - ]
5442 [ + - ]: 78 : sqlite_ps files_del_scan (db, "nuke f_m_s", "delete from " BUILDIDS "_file_mtime_scanned "
5443 [ + - + - : 156 : "where file = ? and mtime = ?");
+ - ]
5444 : :
5445 [ + + ]: 102 : while (! stale_fileid_mtime.empty())
5446 : : {
5447 : 24 : auto stale = stale_fileid_mtime.front();
5448 : 24 : stale_fileid_mtime.pop_front();
5449 [ + - + - : 48 : set_metric("thread_work_pending","role","groom", stale_fileid_mtime.size());
+ - + - ]
5450 : :
5451 : : // PR28514: limit grooming iteration to O(rescan time), to avoid
5452 : : // slow nuke_* queries over many files locking out rescans for too
5453 : : // long. We iterate over the files in random() sequence to avoid
5454 : : // partial checks going over the same set.
5455 [ - + - - ]: 24 : if (rescan_s > 0 && (long)time(NULL) > (long)(time_start + rescan_s))
5456 : : {
5457 [ # # # # : 0 : inc_metric("groomed_total", "action", "aborted");
# # # # ]
5458 : 0 : break;
5459 : : }
5460 : :
5461 [ + - ]: 24 : if (interrupted) break;
5462 : :
5463 : 24 : int64_t fileid = stale.first;
5464 : 24 : int64_t mtime = stale.second;
5465 [ + - + - : 24 : files_del_f_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
+ - + - ]
5466 [ + - + - : 24 : files_del_r_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
+ - + - ]
5467 [ + - + - : 24 : files_del_scan.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
+ - + - ]
5468 [ + - + - : 48 : inc_metric("groomed_total", "action", "cleaned");
+ - + - ]
5469 : :
5470 [ + - ]: 24 : if (sigusr1 != forced_rescan_count) // stop early if scan triggered
5471 : : break;
5472 : : }
5473 : 78 : stale_fileid_mtime.clear(); // no need for this any longer
5474 [ + - + - : 156 : set_metric("thread_work_pending","role","groom", stale_fileid_mtime.size());
+ - + - ]
5475 : :
5476 : : // delete buildids with no references in _r_de or _f_de tables;
5477 : : // cascades to _r_sref & _f_s records
5478 [ + - ]: 78 : sqlite_ps buildids_del (db, "nuke orphan buildids",
5479 : : "delete from " BUILDIDS "_buildids "
5480 : : "where not exists (select 1 from " BUILDIDS "_f_de d where " BUILDIDS "_buildids.id = d.buildid) "
5481 [ + - + - : 156 : "and not exists (select 1 from " BUILDIDS "_r_de d where " BUILDIDS "_buildids.id = d.buildid)");
+ - ]
5482 [ + - + - ]: 78 : buildids_del.reset().step_ok_done();
5483 : :
5484 [ - + ]: 78 : if (interrupted) return;
5485 : :
5486 : : // NB: "vacuum" is too heavy for even daily runs: it rewrites the entire db, so is done as maxigroom -G
5487 [ + - + - : 156 : { sqlite_ps g (db, "incremental vacuum", "pragma incremental_vacuum"); g.reset().step_ok_done(); }
+ - + - +
- ]
5488 : : // https://www.sqlite.org/lang_analyze.html#approx
5489 [ + - + - : 156 : { sqlite_ps g (db, "analyze setup", "pragma analysis_limit = 1000;\n"); g.reset().step_ok_done(); }
+ - + - +
- ]
5490 [ + - + - : 156 : { sqlite_ps g (db, "analyze", "analyze"); g.reset().step_ok_done(); }
+ - + - +
- ]
5491 [ + - + - : 156 : { sqlite_ps g (db, "analyze reload", "analyze sqlite_schema"); g.reset().step_ok_done(); }
+ - + - +
- ]
5492 [ + - + - : 156 : { sqlite_ps g (db, "optimize", "pragma optimize"); g.reset().step_ok_done(); }
+ - + - +
- ]
5493 [ + - + - : 156 : { sqlite_ps g (db, "wal checkpoint", "pragma wal_checkpoint=truncate"); g.reset().step_ok_done(); }
+ - + - +
- ]
5494 : :
5495 [ + - ]: 78 : database_stats_report();
5496 : :
5497 [ + - + - ]: 78 : (void) statfs_free_enough_p(db_path, "database"); // report sqlite filesystem size
5498 : :
5499 [ + - ]: 78 : sqlite3_db_release_memory(db); // shrink the process if possible
5500 [ + - ]: 78 : sqlite3_db_release_memory(dbq); // ... for both connections
5501 [ + - ]: 78 : debuginfod_pool_groom(); // and release any debuginfod_client objects we've been holding onto
5502 : : #if HAVE_MALLOC_TRIM
5503 : 78 : malloc_trim(0); // PR31103: release memory allocated for temporary purposes
5504 : : #endif
5505 : :
5506 : : #if 0 /* PR31265: don't jettison cache unnecessarily */
5507 : : fdcache.limit(0); // release the fdcache contents
5508 : : fdcache.limit(fdcache_mbs); // restore status quo parameters
5509 : : #endif
5510 : :
5511 : 78 : clock_gettime (CLOCK_MONOTONIC, &ts_end);
5512 : 78 : double deltas = (ts_end.tv_sec - ts_start.tv_sec) + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9;
5513 : :
5514 [ + - + - : 156 : obatched(clog) << "groomed database in " << deltas << "s" << endl;
+ - + - ]
5515 : 78 : }
5516 : :
5517 : :
5518 : : static void*
5519 : 82 : thread_main_groom (void* /*arg*/)
5520 : : {
5521 [ + - + - : 164 : set_metric("thread_tid", "role", "groom", tid());
+ - ]
5522 [ + - + - : 164 : add_metric("thread_count", "role", "groom", 1);
+ - ]
5523 : :
5524 : 82 : time_t last_groom = 0;
5525 : :
5526 : 522 : while (1)
5527 : : {
5528 : 302 : sleep (1);
5529 : 302 : scanq.wait_idle(); // PR25394: block scanners during grooming!
5530 [ + + ]: 302 : if (interrupted) break;
5531 : :
5532 : 220 : time_t now = time(NULL);
5533 : 220 : bool groom_now = false;
5534 [ + + ]: 220 : if (last_groom == 0) // at least one initial groom is documented even for -g0
5535 : 72 : groom_now = true;
5536 [ + + + + ]: 220 : if (groom_s > 0 && (long)now > (long)(last_groom + groom_s))
5537 : 220 : groom_now = true;
5538 [ + + ]: 220 : if (sigusr2 != forced_groom_count)
5539 : : {
5540 : 6 : forced_groom_count = sigusr2;
5541 : 6 : groom_now = true;
5542 : : }
5543 [ + + ]: 220 : if (groom_now)
5544 : : {
5545 [ + - + - : 156 : set_metric("thread_busy", "role", "groom", 1);
+ - ]
5546 : 78 : try
5547 : : {
5548 [ + - ]: 78 : groom ();
5549 : : }
5550 [ - - ]: 0 : catch (const sqlite_exception& e)
5551 : : {
5552 [ - - - - : 0 : obatched(cerr) << e.message << endl;
- - ]
5553 : 0 : }
5554 : 78 : last_groom = time(NULL); // NB: now was before grooming
5555 : : // finished a grooming loop
5556 [ + - + - : 156 : inc_metric("thread_work_total", "role", "groom");
+ - ]
5557 [ + - + - : 156 : set_metric("thread_busy", "role", "groom", 0);
+ - ]
5558 : : }
5559 : :
5560 : 220 : scanq.done_idle();
5561 : 220 : }
5562 : :
5563 : 82 : return 0;
5564 : : }
5565 : :
5566 : :
5567 : : ////////////////////////////////////////////////////////////////////////
5568 : :
5569 : :
5570 : : static void
5571 : 84 : signal_handler (int /* sig */)
5572 : : {
5573 : 84 : interrupted ++;
5574 : :
5575 [ + + ]: 84 : if (db)
5576 : 82 : sqlite3_interrupt (db);
5577 [ + - ]: 84 : if (dbq)
5578 : 84 : sqlite3_interrupt (dbq);
5579 : :
5580 : : // NB: don't do anything else in here
5581 : 84 : }
5582 : :
5583 : : static void
5584 : 58 : sigusr1_handler (int /* sig */)
5585 : : {
5586 : 58 : sigusr1 ++;
5587 : : // NB: don't do anything else in here
5588 : 58 : }
5589 : :
5590 : : static void
5591 : 6 : sigusr2_handler (int /* sig */)
5592 : : {
5593 : 6 : sigusr2 ++;
5594 : : // NB: don't do anything else in here
5595 : 6 : }
5596 : :
5597 : :
5598 : : static void // error logging callback from libmicrohttpd internals
5599 : 0 : error_cb (void *arg, const char *fmt, va_list ap)
5600 : : {
5601 : 0 : (void) arg;
5602 [ # # # # : 0 : inc_metric("error_count","libmicrohttpd",fmt);
# # ]
5603 : 0 : char errmsg[512];
5604 : 0 : (void) vsnprintf (errmsg, sizeof(errmsg), fmt, ap); // ok if slightly truncated
5605 [ # # ]: 0 : obatched(cerr) << "libmicrohttpd error: " << errmsg; // MHD_DLOG calls already include \n
5606 : 0 : }
5607 : :
5608 : :
5609 : : // A user-defined sqlite function, to score the sharedness of the
5610 : : // prefix of two strings. This is used to compare candidate debuginfo
5611 : : // / source-rpm names, so that the closest match
5612 : : // (directory-topology-wise closest) is found. This is important in
5613 : : // case the same sref (source file name) is in many -debuginfo or
5614 : : // -debugsource RPMs, such as when multiple versions/releases of the
5615 : : // same package are in the database.
5616 : :
5617 : 1344 : static void sqlite3_sharedprefix_fn (sqlite3_context* c, int argc, sqlite3_value** argv)
5618 : : {
5619 [ - + ]: 1344 : if (argc != 2)
5620 : 0 : sqlite3_result_error(c, "expect 2 string arguments", -1);
5621 [ + - + + ]: 2688 : else if ((sqlite3_value_type(argv[0]) != SQLITE_TEXT) ||
5622 : 1344 : (sqlite3_value_type(argv[1]) != SQLITE_TEXT))
5623 : 1082 : sqlite3_result_null(c);
5624 : : else
5625 : : {
5626 : 262 : const unsigned char* a = sqlite3_value_text (argv[0]);
5627 : 262 : const unsigned char* b = sqlite3_value_text (argv[1]);
5628 : 262 : int i = 0;
5629 [ + + + - : 27722 : while (*a != '\0' && *b != '\0' && *a++ == *b++)
+ + ]
5630 : 27198 : i++;
5631 : 262 : sqlite3_result_int (c, i);
5632 : : }
5633 : 1344 : }
5634 : :
5635 : :
5636 : : static unsigned
5637 : 164 : default_concurrency() // guaranteed >= 1
5638 : : {
5639 : : // Prior to PR29975 & PR29976, we'd just use this:
5640 : 164 : unsigned sth = std::thread::hardware_concurrency();
5641 : : // ... but on many-CPU boxes, admins or distros may throttle
5642 : : // resources in such a way that debuginfod would mysteriously fail.
5643 : : // So we reduce the defaults:
5644 : :
5645 : 164 : unsigned aff = 0;
5646 : : #ifdef HAVE_SCHED_GETAFFINITY
5647 : 164 : {
5648 : 164 : int ret;
5649 : 164 : cpu_set_t mask;
5650 : 164 : CPU_ZERO(&mask);
5651 : 164 : ret = sched_getaffinity(0, sizeof(mask), &mask);
5652 [ + - ]: 164 : if (ret == 0)
5653 : 164 : aff = CPU_COUNT(&mask);
5654 : : }
5655 : : #endif
5656 : :
5657 : 164 : unsigned fn = 0;
5658 : : #ifdef HAVE_GETRLIMIT
5659 : 164 : {
5660 : 164 : struct rlimit rlim;
5661 : 164 : int rc = getrlimit(RLIMIT_NOFILE, &rlim);
5662 [ + - ]: 164 : if (rc == 0)
5663 [ - + ]: 164 : fn = max((rlim_t)1, (rlim.rlim_cur - 100) / 4);
5664 : : // at least 2 fds are used by each listener thread etc.
5665 : : // plus a bunch to account for shared libraries and such
5666 : : }
5667 : : #endif
5668 : :
5669 [ - + + - : 328 : unsigned d = min(max(sth, 1U),
- + ]
5670 [ + - ]: 164 : min(max(aff, 1U),
5671 [ - + ]: 164 : max(fn, 1U)));
5672 : 164 : return d;
5673 : : }
5674 : :
5675 : :
5676 : : // 30879: Something to help out in case of an uncaught exception.
5677 : 0 : void my_terminate_handler()
5678 : : {
5679 : : #if defined(__GLIBC__)
5680 : 0 : void *array[40];
5681 : 0 : int size = backtrace (array, 40);
5682 : 0 : backtrace_symbols_fd (array, size, STDERR_FILENO);
5683 : : #endif
5684 : : #if defined(__GLIBCXX__) || defined(__GLIBCPP__)
5685 : 0 : __gnu_cxx::__verbose_terminate_handler();
5686 : : #endif
5687 : 0 : abort();
5688 : : }
5689 : :
5690 : :
5691 : : int
5692 : 84 : main (int argc, char *argv[])
5693 : : {
5694 : 84 : (void) setlocale (LC_ALL, "");
5695 : 84 : (void) bindtextdomain (PACKAGE_TARNAME, LOCALEDIR);
5696 : 84 : (void) textdomain (PACKAGE_TARNAME);
5697 : :
5698 : 84 : std::set_terminate(& my_terminate_handler);
5699 : :
5700 : : /* Tell the library which version we are expecting. */
5701 : 84 : elf_version (EV_CURRENT);
5702 : :
5703 [ + - ]: 168 : tmpdir = string(getenv("TMPDIR") ?: "/tmp");
5704 : :
5705 : : /* Set computed default values. */
5706 [ - + + - : 84 : db_path = string(getenv("HOME") ?: "/") + string("/.debuginfod.sqlite"); /* XDG? */
+ - ]
5707 : 84 : int rc = regcomp (& file_include_regex, ".*", REG_EXTENDED|REG_NOSUB); // match everything
5708 [ - + ]: 84 : if (rc != 0)
5709 : 0 : error (EXIT_FAILURE, 0, "regcomp failure: %d", rc);
5710 : 84 : rc = regcomp (& file_exclude_regex, "^$", REG_EXTENDED|REG_NOSUB); // match nothing
5711 [ - + ]: 84 : if (rc != 0)
5712 : 0 : error (EXIT_FAILURE, 0, "regcomp failure: %d", rc);
5713 : :
5714 : : // default parameters for fdcache are computed from system stats
5715 : 84 : struct statfs sfs;
5716 : 84 : rc = statfs(tmpdir.c_str(), &sfs);
5717 [ - + ]: 84 : if (rc < 0)
5718 : 0 : fdcache_mbs = 1024; // 1 gigabyte
5719 : : else
5720 : 84 : fdcache_mbs = sfs.f_bavail * sfs.f_bsize / 1024 / 1024 / 4; // 25% of free space
5721 : 84 : fdcache_mintmp = 25; // emergency flush at 25% remaining (75% full)
5722 : 84 : fdcache_prefetch = 64; // guesstimate storage is this much less costly than re-decompression
5723 : :
5724 : : /* Parse and process arguments. */
5725 : 84 : memset(&http_sockaddr, 0, sizeof(http_sockaddr));
5726 : 84 : http_sockaddr.sin6_family = AF_UNSPEC;
5727 : 84 : int remaining;
5728 : 84 : (void) argp_parse (&argp, argc, argv, ARGP_IN_ORDER, &remaining, NULL);
5729 [ - + ]: 84 : if (remaining != argc)
5730 : 0 : error (EXIT_FAILURE, 0,
5731 : 0 : "unexpected argument: %s", argv[remaining]);
5732 : :
5733 [ + + + + : 84 : if (scan_archives.size()==0 && !scan_files && source_paths.size()>0)
- + ]
5734 [ # # ]: 0 : obatched(clog) << "warning: without -F -R -U -Z, ignoring PATHs" << endl;
5735 : :
5736 : 84 : fdcache.limit(fdcache_mbs);
5737 : :
5738 : 84 : (void) signal (SIGPIPE, SIG_IGN); // microhttpd can generate it incidentally, ignore
5739 : 84 : (void) signal (SIGINT, signal_handler); // ^C
5740 : 84 : (void) signal (SIGHUP, signal_handler); // EOF
5741 : 84 : (void) signal (SIGTERM, signal_handler); // systemd
5742 : 84 : (void) signal (SIGUSR1, sigusr1_handler); // end-user
5743 : 84 : (void) signal (SIGUSR2, sigusr2_handler); // end-user
5744 : :
5745 : : /* Get database ready. */
5746 [ + + ]: 84 : if (! passive_p)
5747 : : {
5748 : 82 : rc = sqlite3_open_v2 (db_path.c_str(), &db, (SQLITE_OPEN_READWRITE
5749 : : |SQLITE_OPEN_URI
5750 : : |SQLITE_OPEN_PRIVATECACHE
5751 : : |SQLITE_OPEN_CREATE
5752 : : |SQLITE_OPEN_FULLMUTEX), /* thread-safe */
5753 : : NULL);
5754 [ - + ]: 82 : if (rc == SQLITE_CORRUPT)
5755 : : {
5756 : 0 : (void) unlink (db_path.c_str());
5757 : 0 : error (EXIT_FAILURE, 0,
5758 : : "cannot open %s, deleted database: %s", db_path.c_str(), sqlite3_errmsg(db));
5759 : : }
5760 [ - + ]: 82 : else if (rc)
5761 : : {
5762 : 0 : error (EXIT_FAILURE, 0,
5763 : : "cannot open %s, consider deleting database: %s", db_path.c_str(), sqlite3_errmsg(db));
5764 : : }
5765 : : }
5766 : :
5767 : : // open the readonly query variant
5768 : : // NB: PRIVATECACHE allows web queries to operate in parallel with
5769 : : // much other grooming/scanning operation.
5770 : 84 : rc = sqlite3_open_v2 (db_path.c_str(), &dbq, (SQLITE_OPEN_READONLY
5771 : : |SQLITE_OPEN_URI
5772 : : |SQLITE_OPEN_PRIVATECACHE
5773 : : |SQLITE_OPEN_FULLMUTEX), /* thread-safe */
5774 : : NULL);
5775 [ - + ]: 84 : if (rc)
5776 : : {
5777 : 0 : error (EXIT_FAILURE, 0,
5778 : : "cannot open %s, consider deleting database: %s", db_path.c_str(), sqlite3_errmsg(dbq));
5779 : : }
5780 : :
5781 : :
5782 [ + - ]: 168 : obatched(clog) << "opened database " << db_path
5783 [ + + + - : 86 : << (db?" rw":"") << (dbq?" ro":"") << endl;
- + + - +
- ]
5784 [ + - + - ]: 168 : obatched(clog) << "sqlite version " << sqlite3_version << endl;
5785 [ + + + - : 250 : obatched(clog) << "service mode " << (passive_p ? "passive":"active") << endl;
+ - ]
5786 : :
5787 : : // add special string-prefix-similarity function used in rpm sref/sdef resolution
5788 : 84 : rc = sqlite3_create_function(dbq, "sharedprefix", 2, SQLITE_UTF8, NULL,
5789 : : & sqlite3_sharedprefix_fn, NULL, NULL);
5790 [ - + ]: 84 : if (rc != SQLITE_OK)
5791 : 0 : error (EXIT_FAILURE, 0,
5792 : : "cannot create sharedprefix function: %s", sqlite3_errmsg(dbq));
5793 : :
5794 [ + + ]: 84 : if (! passive_p)
5795 : : {
5796 [ + + ]: 82 : if (verbose > 3)
5797 [ + - + - ]: 92 : obatched(clog) << "ddl: " << DEBUGINFOD_SQLITE_DDL << endl;
5798 : 82 : rc = sqlite3_exec (db, DEBUGINFOD_SQLITE_DDL, NULL, NULL, NULL);
5799 [ - + ]: 82 : if (rc != SQLITE_OK)
5800 : : {
5801 : 0 : error (EXIT_FAILURE, 0,
5802 : : "cannot run database schema ddl: %s", sqlite3_errmsg(db));
5803 : : }
5804 : : }
5805 : :
5806 [ + - + - : 168 : obatched(clog) << "libmicrohttpd version " << MHD_get_version() << endl;
+ - ]
5807 : :
5808 : : /* If '-C' wasn't given or was given with no arg, pick a reasonable default
5809 : : for the number of worker threads. */
5810 [ + + ]: 84 : if (connection_pool == 0)
5811 : 80 : connection_pool = default_concurrency();
5812 : :
5813 : : /* Note that MHD_USE_EPOLL and MHD_USE_THREAD_PER_CONNECTION don't
5814 : : work together. */
5815 : 84 : unsigned int use_epoll = 0;
5816 : : #if MHD_VERSION >= 0x00095100
5817 : 84 : use_epoll = MHD_USE_EPOLL;
5818 : : #endif
5819 : :
5820 : 84 : unsigned int mhd_flags = (
5821 : : #if MHD_VERSION >= 0x00095300
5822 : : MHD_USE_INTERNAL_POLLING_THREAD
5823 : : #else
5824 : : MHD_USE_SELECT_INTERNALLY
5825 : : #endif
5826 : : | MHD_USE_DUAL_STACK
5827 : : | use_epoll
5828 : : #if MHD_VERSION >= 0x00095200
5829 : : | MHD_USE_ITC
5830 : : #endif
5831 : : | MHD_USE_DEBUG); /* report errors to stderr */
5832 : :
5833 : 84 : MHD_Daemon *dsa = NULL,
5834 : 84 : *d4 = NULL,
5835 : 84 : *d46 = NULL;
5836 : :
5837 [ - + ]: 84 : if (http_sockaddr.sin6_family != AF_UNSPEC)
5838 : : {
5839 [ # # ]: 0 : if (http_sockaddr.sin6_family == AF_INET)
5840 : 0 : ((sockaddr_in*)&http_sockaddr)->sin_port = htons(http_port);
5841 [ # # ]: 0 : if (http_sockaddr.sin6_family == AF_INET6)
5842 : 0 : http_sockaddr.sin6_port = htons(http_port);
5843 : : // Start httpd server threads on socket addr:port.
5844 : 0 : dsa = MHD_start_daemon (mhd_flags & ~MHD_USE_DUAL_STACK, http_port,
5845 : : NULL, NULL, /* default accept policy */
5846 : : handler_cb, NULL, /* handler callback */
5847 : : MHD_OPTION_EXTERNAL_LOGGER,
5848 : : error_cb, NULL,
5849 : : MHD_OPTION_SOCK_ADDR,
5850 : : (struct sockaddr *) &http_sockaddr,
5851 : : MHD_OPTION_THREAD_POOL_SIZE,
5852 : : (int)connection_pool,
5853 : : MHD_OPTION_END);
5854 : : }
5855 : : else
5856 : : {
5857 : : // Start httpd server threads. Use a single dual-homed pool.
5858 : 84 : d46 = MHD_start_daemon (mhd_flags, http_port,
5859 : : NULL, NULL, /* default accept policy */
5860 : : handler_cb, NULL, /* handler callback */
5861 : : MHD_OPTION_EXTERNAL_LOGGER,
5862 : : error_cb, NULL,
5863 : : MHD_OPTION_THREAD_POOL_SIZE,
5864 : : (int)connection_pool,
5865 : : MHD_OPTION_END);
5866 : 84 : addr_info = "IPv4 IPv6";
5867 [ - + ]: 84 : if (d46 == NULL)
5868 : : {
5869 : : // Cannot use dual_stack, use ipv4 only
5870 : 0 : mhd_flags &= ~(MHD_USE_DUAL_STACK);
5871 [ # # ]: 0 : d4 = MHD_start_daemon (mhd_flags, http_port,
5872 : : NULL, NULL, /* default accept policy */
5873 : : handler_cb, NULL, /* handler callback */
5874 : : MHD_OPTION_EXTERNAL_LOGGER,
5875 : : error_cb, NULL,
5876 : : (connection_pool
5877 : : ? MHD_OPTION_THREAD_POOL_SIZE
5878 : : : MHD_OPTION_END),
5879 : : (connection_pool
5880 : : ? (int)connection_pool
5881 : : : MHD_OPTION_END),
5882 : : MHD_OPTION_END);
5883 : 0 : addr_info = "IPv4";
5884 : : }
5885 : : }
5886 [ - + - - ]: 84 : if (d4 == NULL && d46 == NULL && dsa == NULL)
5887 : : {
5888 : 0 : sqlite3 *database = db;
5889 : 0 : sqlite3 *databaseq = dbq;
5890 : 0 : db = dbq = 0; // for signal_handler not to freak
5891 : 0 : sqlite3_close (databaseq);
5892 : 0 : sqlite3_close (database);
5893 : 0 : error (EXIT_FAILURE, 0, "cannot start http server on %s port %d",
5894 : : addr_info.c_str(), http_port);
5895 : : }
5896 : :
5897 [ + - ]: 168 : obatched(clog) << "started http server on "
5898 : : << addr_info
5899 [ + - ]: 84 : << " port=" << http_port
5900 [ + - + + : 162 : << (webapi_cors ? " with cors" : "")
+ - + - ]
5901 : 84 : << endl;
5902 : :
5903 : : // add maxigroom sql if -G given
5904 [ - + ]: 84 : if (maxigroom)
5905 : : {
5906 [ # # ]: 0 : obatched(clog) << "maxigrooming database, please wait." << endl;
5907 : : // NB: this index alone can nearly double the database size!
5908 : : // NB: this index would be necessary to run source-file metadata searches fast
5909 : 0 : extra_ddl.push_back("create index if not exists " BUILDIDS "_r_sref_arc on " BUILDIDS "_r_sref(artifactsrc);");
5910 : 0 : extra_ddl.push_back("delete from " BUILDIDS "_r_sdef where not exists (select 1 from " BUILDIDS "_r_sref b where " BUILDIDS "_r_sdef.content = b.artifactsrc);");
5911 : 0 : extra_ddl.push_back("drop index if exists " BUILDIDS "_r_sref_arc;");
5912 : :
5913 : : // NB: we don't maxigroom the _files interning table. It'd require a temp index on all the
5914 : : // tables that have file foreign-keys, which is a lot.
5915 : :
5916 : : // NB: with =delete, may take up 3x disk space total during vacuum process
5917 : : // vs. =off (only 2x but may corrupt database if program dies mid-vacuum)
5918 : : // vs. =wal (>3x observed, but safe)
5919 : 0 : extra_ddl.push_back("pragma journal_mode=delete;");
5920 : 0 : extra_ddl.push_back("vacuum;");
5921 : 0 : extra_ddl.push_back("pragma journal_mode=wal;");
5922 : : }
5923 : :
5924 : : // run extra -D sql if given
5925 [ + + ]: 84 : if (! passive_p)
5926 [ - + ]: 82 : for (auto&& i: extra_ddl)
5927 : : {
5928 [ # # ]: 0 : if (verbose > 1)
5929 [ # # # # ]: 0 : obatched(clog) << "extra ddl:\n" << i << endl;
5930 : 0 : rc = sqlite3_exec (db, i.c_str(), NULL, NULL, NULL);
5931 [ # # # # ]: 0 : if (rc != SQLITE_OK && rc != SQLITE_DONE && rc != SQLITE_ROW)
5932 : 0 : error (0, 0,
5933 : : "warning: cannot run database extra ddl %s: %s", i.c_str(), sqlite3_errmsg(db));
5934 : :
5935 [ # # ]: 0 : if (maxigroom)
5936 [ # # ]: 0 : obatched(clog) << "maxigroomed database" << endl;
5937 : : }
5938 : :
5939 [ + + ]: 84 : if (! passive_p)
5940 [ + - + - ]: 164 : obatched(clog) << "search concurrency " << concurrency << endl;
5941 : 84 : obatched(clog) << "webapi connection pool " << connection_pool
5942 [ + - - + : 84 : << (connection_pool ? "" : " (unlimited)") << endl;
+ - + - ]
5943 [ + + ]: 84 : if (! passive_p) {
5944 [ + - + - ]: 164 : obatched(clog) << "rescan time " << rescan_s << endl;
5945 [ + - + - ]: 164 : obatched(clog) << "scan checkpoint " << scan_checkpoint << endl;
5946 : : }
5947 [ + - + - ]: 168 : obatched(clog) << "fdcache mbs " << fdcache_mbs << endl;
5948 [ + - + - ]: 168 : obatched(clog) << "fdcache prefetch " << fdcache_prefetch << endl;
5949 [ + - + - ]: 168 : obatched(clog) << "fdcache tmpdir " << tmpdir << endl;
5950 [ + - + - ]: 168 : obatched(clog) << "fdcache tmpdir min% " << fdcache_mintmp << endl;
5951 [ + + ]: 84 : if (! passive_p)
5952 [ + - + - ]: 164 : obatched(clog) << "groom time " << groom_s << endl;
5953 [ + - + - ]: 168 : obatched(clog) << "forwarded ttl limit " << forwarded_ttl_limit << endl;
5954 : :
5955 [ + + ]: 84 : if (scan_archives.size()>0)
5956 : : {
5957 : 56 : obatched ob(clog);
5958 [ + - ]: 56 : auto& o = ob << "accepting archive types ";
5959 [ + + ]: 174 : for (auto&& arch : scan_archives)
5960 [ + - + - : 118 : o << arch.first << "(" << arch.second << ") ";
+ - + - ]
5961 [ + - ]: 56 : o << endl;
5962 : 56 : }
5963 : 84 : const char* du = getenv(DEBUGINFOD_URLS_ENV_VAR);
5964 [ + + + + ]: 84 : if (du && du[0] != '\0') // set to non-empty string?
5965 [ + - + - ]: 32 : obatched(clog) << "upstream debuginfod servers: " << du << endl;
5966 : :
5967 [ + + ]: 84 : vector<pthread_t> all_threads;
5968 : :
5969 [ + + ]: 84 : if (! passive_p)
5970 : : {
5971 : 82 : pthread_t pt;
5972 : 82 : rc = pthread_create (& pt, NULL, thread_main_groom, NULL);
5973 [ - + ]: 82 : if (rc)
5974 : 0 : error (EXIT_FAILURE, rc, "cannot spawn thread to groom database\n");
5975 : : else
5976 : : {
5977 : : #ifdef HAVE_PTHREAD_SETNAME_NP
5978 : 82 : (void) pthread_setname_np (pt, "groom");
5979 : : #endif
5980 [ + - ]: 82 : all_threads.push_back(pt);
5981 : : }
5982 : :
5983 [ + + + + ]: 82 : if (scan_files || scan_archives.size() > 0)
5984 : : {
5985 [ + - ]: 72 : if (scan_checkpoint > 0)
5986 [ + - ]: 72 : scan_barrier = new sqlite_checkpoint_pb(concurrency, (unsigned) scan_checkpoint);
5987 : :
5988 : 72 : rc = pthread_create (& pt, NULL, thread_main_fts_source_paths, NULL);
5989 [ - + ]: 72 : if (rc)
5990 : 0 : error (EXIT_FAILURE, rc, "cannot spawn thread to traverse source paths\n");
5991 : : #ifdef HAVE_PTHREAD_SETNAME_NP
5992 : 72 : (void) pthread_setname_np (pt, "traverse");
5993 : : #endif
5994 [ + - ]: 72 : all_threads.push_back(pt);
5995 : :
5996 [ + + ]: 360 : for (unsigned i=0; i<concurrency; i++)
5997 : : {
5998 : 288 : rc = pthread_create (& pt, NULL, thread_main_scanner, NULL);
5999 [ - + ]: 288 : if (rc)
6000 : 0 : error (EXIT_FAILURE, rc, "cannot spawn thread to scan source files / archives\n");
6001 : : #ifdef HAVE_PTHREAD_SETNAME_NP
6002 : 288 : (void) pthread_setname_np (pt, "scan");
6003 : : #endif
6004 [ + - ]: 288 : all_threads.push_back(pt);
6005 : : }
6006 : : }
6007 : : }
6008 : :
6009 : : /* Trivial main loop! */
6010 [ + - + - ]: 84 : set_metric("ready", 1);
6011 [ + + ]: 232 : while (! interrupted)
6012 [ + - ]: 148 : pause ();
6013 [ + - ]: 84 : scanq.nuke(); // wake up any remaining scanq-related threads, let them die
6014 [ + + + - ]: 84 : if (scan_barrier) scan_barrier->nuke(); // ... in case they're stuck in a barrier
6015 [ + - + - ]: 84 : set_metric("ready", 0);
6016 : :
6017 [ + - ]: 84 : if (verbose)
6018 [ + - + - : 168 : obatched(clog) << "stopping" << endl;
- - ]
6019 : :
6020 : : /* Join all our threads. */
6021 [ + + ]: 526 : for (auto&& it : all_threads)
6022 [ + - ]: 442 : pthread_join (it, NULL);
6023 : :
6024 : : /* Stop all the web service threads. */
6025 [ - + - - ]: 84 : if (dsa) MHD_stop_daemon (dsa);
6026 [ + - + - ]: 84 : if (d46) MHD_stop_daemon (d46);
6027 [ - + - - ]: 84 : if (d4) MHD_stop_daemon (d4);
6028 : :
6029 [ + + ]: 84 : if (! passive_p)
6030 : : {
6031 : : /* With all threads known dead, we can clean up the global resources. */
6032 [ + - ]: 82 : rc = sqlite3_exec (db, DEBUGINFOD_SQLITE_CLEANUP_DDL, NULL, NULL, NULL);
6033 [ - + ]: 82 : if (rc != SQLITE_OK)
6034 : : {
6035 [ # # ]: 0 : error (0, 0,
6036 : : "warning: cannot run database cleanup ddl: %s", sqlite3_errmsg(db));
6037 : : }
6038 : : }
6039 : :
6040 [ + - ]: 84 : debuginfod_pool_groom ();
6041 [ + + ]: 84 : delete scan_barrier;
6042 : :
6043 : : // NB: no problem with unconditional free here - an earlier failed regcomp would exit program
6044 [ + - ]: 84 : (void) regfree (& file_include_regex);
6045 [ + - ]: 84 : (void) regfree (& file_exclude_regex);
6046 : :
6047 : 84 : sqlite3 *database = db;
6048 : 84 : sqlite3 *databaseq = dbq;
6049 : 84 : db = dbq = 0; // for signal_handler not to freak
6050 [ + - ]: 84 : (void) sqlite3_close (databaseq);
6051 [ + + ]: 84 : if (! passive_p)
6052 [ + - ]: 82 : (void) sqlite3_close (database);
6053 : :
6054 [ + + ]: 84 : return 0;
6055 : 84 : }
|