Branch data Line data Source code
1 : : /* Debuginfo-over-http server.
2 : : Copyright (C) 2019-2024 Red Hat, Inc.
3 : : Copyright (C) 2021, 2022 Mark J. Wielaard <mark@klomp.org>
4 : : This file is part of elfutils.
5 : :
6 : : This file is free software; you can redistribute it and/or modify
7 : : it under the terms of the GNU General Public License as published by
8 : : the Free Software Foundation; either version 3 of the License, or
9 : : (at your option) any later version.
10 : :
11 : : elfutils is distributed in the hope that it will be useful, but
12 : : WITHOUT ANY WARRANTY; without even the implied warranty of
13 : : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : : GNU General Public License for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 : :
19 : :
20 : : /* cargo-cult from libdwfl linux-kernel-modules.c */
21 : : /* In case we have a bad fts we include this before config.h because it
22 : : can't handle _FILE_OFFSET_BITS.
23 : : Everything we need here is fine if its declarations just come first.
24 : : Also, include sys/types.h before fts. On some systems fts.h is not self
25 : : contained. */
26 : : #ifdef BAD_FTS
27 : : #include <sys/types.h>
28 : : #include <fts.h>
29 : : #endif
30 : :
31 : : #ifdef HAVE_CONFIG_H
32 : : #include "config.h"
33 : : #endif
34 : :
35 : : // #define _GNU_SOURCE
36 : : #ifdef HAVE_SCHED_H
37 : : extern "C" {
38 : : #include <sched.h>
39 : : }
40 : : #endif
41 : : #ifdef HAVE_SYS_RESOURCE_H
42 : : extern "C" {
43 : : #include <sys/resource.h>
44 : : }
45 : : #endif
46 : :
47 : : #ifdef HAVE_EXECINFO_H
48 : : extern "C" {
49 : : #include <execinfo.h>
50 : : }
51 : : #endif
52 : : #ifdef HAVE_MALLOC_H
53 : : extern "C" {
54 : : #include <malloc.h>
55 : : }
56 : : #endif
57 : :
58 : : #include "debuginfod.h"
59 : : #include <dwarf.h>
60 : :
61 : : #include <argp.h>
62 : : #ifdef __GNUC__
63 : : #undef __attribute__ /* glibc bug - rhbz 1763325 */
64 : : #endif
65 : :
66 : : #include <unistd.h>
67 : : #include <stdlib.h>
68 : : #include <locale.h>
69 : : #include <pthread.h>
70 : : #include <signal.h>
71 : : #include <sys/stat.h>
72 : : #include <sys/time.h>
73 : : #include <sys/vfs.h>
74 : : #include <unistd.h>
75 : : #include <fcntl.h>
76 : : #include <netdb.h>
77 : : #include <math.h>
78 : : #include <float.h>
79 : : #include <fnmatch.h>
80 : :
81 : :
82 : : /* If fts.h is included before config.h, its indirect inclusions may not
83 : : give us the right LFS aliases of these functions, so map them manually. */
84 : : #ifdef BAD_FTS
85 : : #ifdef _FILE_OFFSET_BITS
86 : : #define open open64
87 : : #define fopen fopen64
88 : : #endif
89 : : #else
90 : : #include <sys/types.h>
91 : : #include <fts.h>
92 : : #endif
93 : :
94 : : #include <cstring>
95 : : #include <vector>
96 : : #include <set>
97 : : #include <unordered_set>
98 : : #include <map>
99 : : #include <string>
100 : : #include <iostream>
101 : : #include <iomanip>
102 : : #include <ostream>
103 : : #include <sstream>
104 : : #include <mutex>
105 : : #include <deque>
106 : : #include <condition_variable>
107 : : #include <exception>
108 : : #include <thread>
109 : : // #include <regex> // on rhel7 gcc 4.8, not competent
110 : : #include <regex.h>
111 : : // #include <algorithm>
112 : : using namespace std;
113 : :
114 : : #include <gelf.h>
115 : : #include <libdwelf.h>
116 : :
117 : : #include <microhttpd.h>
118 : :
119 : : #if MHD_VERSION >= 0x00097002
120 : : // libmicrohttpd 0.9.71 broke API
121 : : #define MHD_RESULT enum MHD_Result
122 : : #else
123 : : #define MHD_RESULT int
124 : : #endif
125 : :
126 : : #ifdef ENABLE_IMA_VERIFICATION
127 : : #include <rpm/rpmlib.h>
128 : : #include <rpm/rpmfi.h>
129 : : #include <rpm/header.h>
130 : : #include <glob.h>
131 : : #endif
132 : :
133 : : #include <curl/curl.h>
134 : : #include <archive.h>
135 : : #include <archive_entry.h>
136 : : #include <sqlite3.h>
137 : :
138 : : #ifdef __linux__
139 : : #include <sys/syscall.h>
140 : : #endif
141 : :
142 : : #ifdef __linux__
143 : : #define tid() syscall(SYS_gettid)
144 : : #else
145 : : #define tid() pthread_self()
146 : : #endif
147 : :
148 : : extern "C" {
149 : : #include "printversion.h"
150 : : #include "system.h"
151 : : }
152 : : #include <json-c/json.h>
153 : :
154 : :
155 : : inline bool
156 : 2065 : string_endswith(const string& haystack, const string& needle)
157 : : {
158 [ + - ]: 2065 : return (haystack.size() >= needle.size() &&
159 : 2065 : equal(haystack.end()-needle.size(), haystack.end(),
160 : 2065 : needle.begin()));
161 : : }
162 : :
163 : :
164 : : // Roll this identifier for every sqlite schema incompatibility.
165 : : #define BUILDIDS "buildids10"
166 : :
167 : : #if SQLITE_VERSION_NUMBER >= 3008000
168 : : #define WITHOUT_ROWID "without rowid"
169 : : #else
170 : : #define WITHOUT_ROWID ""
171 : : #endif
172 : :
173 : : static const char DEBUGINFOD_SQLITE_DDL[] =
174 : : "pragma foreign_keys = on;\n"
175 : : "pragma synchronous = 0;\n" // disable fsync()s - this cache is disposable across a machine crash
176 : : "pragma journal_mode = wal;\n" // https://sqlite.org/wal.html
177 : : "pragma wal_checkpoint = truncate;\n" // clean out any preexisting wal file
178 : : "pragma journal_size_limit = 0;\n" // limit steady state file (between grooming, which also =truncate's)
179 : : "pragma auto_vacuum = incremental;\n" // https://sqlite.org/pragma.html
180 : : "pragma busy_timeout = 1000;\n" // https://sqlite.org/pragma.html
181 : : // NB: all these are overridable with -D option
182 : :
183 : : // Normalization table for interning file names
184 : : "create table if not exists " BUILDIDS "_fileparts (\n"
185 : : " id integer primary key not null,\n"
186 : : " name text unique not null\n"
187 : : " );\n"
188 : : "create table if not exists " BUILDIDS "_files (\n"
189 : : " id integer primary key not null,\n"
190 : : " dirname integer not null,\n"
191 : : " basename integer not null,\n"
192 : : " unique (dirname, basename),\n"
193 : : " foreign key (dirname) references " BUILDIDS "_fileparts(id) on delete cascade,\n"
194 : : " foreign key (basename) references " BUILDIDS "_fileparts(id) on delete cascade\n"
195 : : " );\n"
196 : : "create view if not exists " BUILDIDS "_files_v as\n" // a
197 : : " select f.id, n1.name || '/' || n2.name as name\n"
198 : : " from " BUILDIDS "_files f, " BUILDIDS "_fileparts n1, " BUILDIDS "_fileparts n2\n"
199 : : " where f.dirname = n1.id and f.basename = n2.id;\n"
200 : :
201 : : // Normalization table for interning buildids
202 : : "create table if not exists " BUILDIDS "_buildids (\n"
203 : : " id integer primary key not null,\n"
204 : : " hex text unique not null);\n"
205 : : // Track the completion of scanning of a given file & sourcetype at given time
206 : : "create table if not exists " BUILDIDS "_file_mtime_scanned (\n"
207 : : " mtime integer not null,\n"
208 : : " file integer not null,\n"
209 : : " size integer not null,\n" // in bytes
210 : : " sourcetype text(1) not null\n"
211 : : " check (sourcetype IN ('F', 'R')),\n"
212 : : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
213 : : " primary key (file, mtime, sourcetype)\n"
214 : : " ) " WITHOUT_ROWID ";\n"
215 : : "create table if not exists " BUILDIDS "_f_de (\n"
216 : : " buildid integer not null,\n"
217 : : " debuginfo_p integer not null,\n"
218 : : " executable_p integer not null,\n"
219 : : " file integer not null,\n"
220 : : " mtime integer not null,\n"
221 : : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
222 : : " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
223 : : " primary key (buildid, file, mtime)\n"
224 : : " ) " WITHOUT_ROWID ";\n"
225 : : // Index for faster delete by file identifier and metadata searches
226 : : "create index if not exists " BUILDIDS "_f_de_idx on " BUILDIDS "_f_de (file, mtime);\n"
227 : : "create table if not exists " BUILDIDS "_f_s (\n"
228 : : " buildid integer not null,\n"
229 : : " artifactsrc integer not null,\n"
230 : : " file integer not null,\n" // NB: not necessarily entered into _mtime_scanned
231 : : " mtime integer not null,\n"
232 : : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
233 : : " foreign key (artifactsrc) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
234 : : " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
235 : : " primary key (buildid, artifactsrc, file, mtime)\n"
236 : : " ) " WITHOUT_ROWID ";\n"
237 : : "create table if not exists " BUILDIDS "_r_de (\n"
238 : : " buildid integer not null,\n"
239 : : " debuginfo_p integer not null,\n"
240 : : " executable_p integer not null,\n"
241 : : " file integer not null,\n"
242 : : " mtime integer not null,\n"
243 : : " content integer not null,\n"
244 : : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
245 : : " foreign key (content) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
246 : : " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
247 : : " primary key (buildid, debuginfo_p, executable_p, file, content, mtime)\n"
248 : : " ) " WITHOUT_ROWID ";\n"
249 : : // Index for faster delete by archive file identifier
250 : : "create index if not exists " BUILDIDS "_r_de_idx on " BUILDIDS "_r_de (file, mtime);\n"
251 : : // Index for metadata searches
252 : : "create index if not exists " BUILDIDS "_r_de_idx2 on " BUILDIDS "_r_de (content);\n"
253 : : "create table if not exists " BUILDIDS "_r_sref (\n" // outgoing dwarf sourcefile references from rpm
254 : : " buildid integer not null,\n"
255 : : " artifactsrc integer not null,\n"
256 : : " foreign key (artifactsrc) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
257 : : " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
258 : : " primary key (buildid, artifactsrc)\n"
259 : : " ) " WITHOUT_ROWID ";\n"
260 : : "create table if not exists " BUILDIDS "_r_sdef (\n" // rpm contents that may satisfy sref
261 : : " file integer not null,\n"
262 : : " mtime integer not null,\n"
263 : : " content integer not null,\n"
264 : : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
265 : : " foreign key (content) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
266 : : " primary key (content, file, mtime)\n"
267 : : " ) " WITHOUT_ROWID ";\n"
268 : : // create views to glue together some of the above tables, for webapi D queries
269 : : "create view if not exists " BUILDIDS "_query_d as \n"
270 : : "select\n"
271 : : " b.hex as buildid, n.mtime, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1\n"
272 : : " from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_f_de n\n"
273 : : " where b.id = n.buildid and f0.id = n.file and n.debuginfo_p = 1\n"
274 : : "union all select\n"
275 : : " b.hex as buildid, n.mtime, 'R' as sourcetype, f0.name as source0, n.mtime as mtime, f1.name as source1\n"
276 : : " from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_files_v f1, " BUILDIDS "_r_de n\n"
277 : : " where b.id = n.buildid and f0.id = n.file and f1.id = n.content and n.debuginfo_p = 1\n"
278 : : ";"
279 : : // ... and for E queries
280 : : "create view if not exists " BUILDIDS "_query_e as \n"
281 : : "select\n"
282 : : " b.hex as buildid, n.mtime, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1\n"
283 : : " from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_f_de n\n"
284 : : " where b.id = n.buildid and f0.id = n.file and n.executable_p = 1\n"
285 : : "union all select\n"
286 : : " b.hex as buildid, n.mtime, 'R' as sourcetype, f0.name as source0, n.mtime as mtime, f1.name as source1\n"
287 : : " from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_files_v f1, " BUILDIDS "_r_de n\n"
288 : : " where b.id = n.buildid and f0.id = n.file and f1.id = n.content and n.executable_p = 1\n"
289 : : ";"
290 : : // ... and for S queries
291 : : "create view if not exists " BUILDIDS "_query_s as \n"
292 : : "select\n"
293 : : " b.hex as buildid, fs.name as artifactsrc, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1, null as source0ref\n"
294 : : " from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_files_v fs, " BUILDIDS "_f_s n\n"
295 : : " where b.id = n.buildid and f0.id = n.file and fs.id = n.artifactsrc\n"
296 : : "union all select\n"
297 : : " b.hex as buildid, f1.name as artifactsrc, 'R' as sourcetype, f0.name as source0, sd.mtime as mtime, f1.name as source1, fsref.name as source0ref\n"
298 : : " from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_files_v f1, " BUILDIDS "_files_v fsref, "
299 : : " " BUILDIDS "_r_sdef sd, " BUILDIDS "_r_sref sr, " BUILDIDS "_r_de sde\n"
300 : : " where b.id = sr.buildid and f0.id = sd.file and fsref.id = sde.file and f1.id = sd.content\n"
301 : : " and sr.artifactsrc = sd.content and sde.buildid = sr.buildid\n"
302 : : ";"
303 : : // and for startup overview counts
304 : : "drop view if exists " BUILDIDS "_stats;\n"
305 : : "create view if not exists " BUILDIDS "_stats as\n"
306 : : " select 'file d/e' as label,count(*) as quantity from " BUILDIDS "_f_de\n"
307 : : "union all select 'file s',count(*) from " BUILDIDS "_f_s\n"
308 : : "union all select 'archive d/e',count(*) from " BUILDIDS "_r_de\n"
309 : : "union all select 'archive sref',count(*) from " BUILDIDS "_r_sref\n"
310 : : "union all select 'archive sdef',count(*) from " BUILDIDS "_r_sdef\n"
311 : : "union all select 'buildids',count(*) from " BUILDIDS "_buildids\n"
312 : : "union all select 'filenames',count(*) from " BUILDIDS "_files\n"
313 : : "union all select 'fileparts',count(*) from " BUILDIDS "_fileparts\n"
314 : : "union all select 'files scanned (#)',count(*) from " BUILDIDS "_file_mtime_scanned\n"
315 : : "union all select 'files scanned (mb)',coalesce(sum(size)/1024/1024,0) from " BUILDIDS "_file_mtime_scanned\n"
316 : : #if SQLITE_VERSION_NUMBER >= 3016000
317 : : "union all select 'index db size (mb)',page_count*page_size/1024/1024 as size FROM pragma_page_count(), pragma_page_size()\n"
318 : : #endif
319 : : ";\n"
320 : :
321 : : // schema change history & garbage collection
322 : : //
323 : : // XXX: we could have migration queries here to bring prior-schema
324 : : // data over instead of just dropping it. But that could incur
325 : : // doubled storage costs.
326 : : //
327 : : // buildids10: split the _files table into _parts
328 : : "" // <<< we are here
329 : : // buildids9: widen the mtime_scanned table
330 : : "DROP VIEW IF EXISTS buildids9_stats;\n"
331 : : "DROP INDEX IF EXISTS buildids9_r_de_idx;\n"
332 : : "DROP INDEX IF EXISTS buildids9_f_de_idx;\n"
333 : : "DROP VIEW IF EXISTS buildids9_query_s;\n"
334 : : "DROP VIEW IF EXISTS buildids9_query_e;\n"
335 : : "DROP VIEW IF EXISTS buildids9_query_d;\n"
336 : : "DROP TABLE IF EXISTS buildids9_r_sdef;\n"
337 : : "DROP TABLE IF EXISTS buildids9_r_sref;\n"
338 : : "DROP TABLE IF EXISTS buildids9_r_de;\n"
339 : : "DROP TABLE IF EXISTS buildids9_f_s;\n"
340 : : "DROP TABLE IF EXISTS buildids9_f_de;\n"
341 : : "DROP TABLE IF EXISTS buildids9_file_mtime_scanned;\n"
342 : : "DROP TABLE IF EXISTS buildids9_buildids;\n"
343 : : "DROP TABLE IF EXISTS buildids9_files;\n"
344 : : // buildids8: slim the sref table
345 : : "drop table if exists buildids8_f_de;\n"
346 : : "drop table if exists buildids8_f_s;\n"
347 : : "drop table if exists buildids8_r_de;\n"
348 : : "drop table if exists buildids8_r_sref;\n"
349 : : "drop table if exists buildids8_r_sdef;\n"
350 : : "drop table if exists buildids8_file_mtime_scanned;\n"
351 : : "drop table if exists buildids8_files;\n"
352 : : "drop table if exists buildids8_buildids;\n"
353 : : // buildids7: separate _norm table into dense subtype tables
354 : : "drop table if exists buildids7_f_de;\n"
355 : : "drop table if exists buildids7_f_s;\n"
356 : : "drop table if exists buildids7_r_de;\n"
357 : : "drop table if exists buildids7_r_sref;\n"
358 : : "drop table if exists buildids7_r_sdef;\n"
359 : : "drop table if exists buildids7_file_mtime_scanned;\n"
360 : : "drop table if exists buildids7_files;\n"
361 : : "drop table if exists buildids7_buildids;\n"
362 : : // buildids6: drop bolo/rfolo again, represent sources / rpmcontents in main table
363 : : "drop table if exists buildids6_norm;\n"
364 : : "drop table if exists buildids6_files;\n"
365 : : "drop table if exists buildids6_buildids;\n"
366 : : "drop view if exists buildids6;\n"
367 : : // buildids5: redefine srcfile1 column to be '.'-less (for rpms)
368 : : "drop table if exists buildids5_norm;\n"
369 : : "drop table if exists buildids5_files;\n"
370 : : "drop table if exists buildids5_buildids;\n"
371 : : "drop table if exists buildids5_bolo;\n"
372 : : "drop table if exists buildids5_rfolo;\n"
373 : : "drop view if exists buildids5;\n"
374 : : // buildids4: introduce rpmfile RFOLO
375 : : "drop table if exists buildids4_norm;\n"
376 : : "drop table if exists buildids4_files;\n"
377 : : "drop table if exists buildids4_buildids;\n"
378 : : "drop table if exists buildids4_bolo;\n"
379 : : "drop table if exists buildids4_rfolo;\n"
380 : : "drop view if exists buildids4;\n"
381 : : // buildids3*: split out srcfile BOLO
382 : : "drop table if exists buildids3_norm;\n"
383 : : "drop table if exists buildids3_files;\n"
384 : : "drop table if exists buildids3_buildids;\n"
385 : : "drop table if exists buildids3_bolo;\n"
386 : : "drop view if exists buildids3;\n"
387 : : // buildids2: normalized buildid and filenames into interning tables;
388 : : "drop table if exists buildids2_norm;\n"
389 : : "drop table if exists buildids2_files;\n"
390 : : "drop table if exists buildids2_buildids;\n"
391 : : "drop view if exists buildids2;\n"
392 : : // buildids1: made buildid and artifacttype NULLable, to represent cached-negative
393 : : // lookups from sources, e.g. files or rpms that contain no buildid-indexable content
394 : : "drop table if exists buildids1;\n"
395 : : // buildids: original
396 : : "drop table if exists buildids;\n"
397 : : ;
398 : :
399 : : static const char DEBUGINFOD_SQLITE_CLEANUP_DDL[] =
400 : : "pragma wal_checkpoint = truncate;\n" // clean out any preexisting wal file
401 : : ;
402 : :
403 : :
404 : :
405 : :
406 : : /* Name and version of program. */
407 : : ARGP_PROGRAM_VERSION_HOOK_DEF = print_version;
408 : :
409 : : /* Bug report address. */
410 : : ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT;
411 : :
412 : : /* Definitions of arguments for argp functions. */
413 : : static const struct argp_option options[] =
414 : : {
415 : : { NULL, 0, NULL, 0, "Scanners:", 1 },
416 : : { "scan-file-dir", 'F', NULL, 0, "Enable ELF/DWARF file scanning.", 0 },
417 : : { "scan-rpm-dir", 'R', NULL, 0, "Enable RPM scanning.", 0 },
418 : : { "scan-deb-dir", 'U', NULL, 0, "Enable DEB scanning.", 0 },
419 : : { "scan-archive", 'Z', "EXT=CMD", 0, "Enable arbitrary archive scanning.", 0 },
420 : : // "source-oci-imageregistry" ...
421 : :
422 : : { NULL, 0, NULL, 0, "Options:", 2 },
423 : : { "logical", 'L', NULL, 0, "Follow symlinks, default=ignore.", 0 },
424 : : { "rescan-time", 't', "SECONDS", 0, "Number of seconds to wait between rescans, 0=disable.", 0 },
425 : : { "groom-time", 'g', "SECONDS", 0, "Number of seconds to wait between database grooming, 0=disable.", 0 },
426 : : { "maxigroom", 'G', NULL, 0, "Run a complete database groom/shrink pass at startup.", 0 },
427 : : { "concurrency", 'c', "NUM", 0, "Limit scanning thread concurrency to NUM, default=#CPUs.", 0 },
428 : : { "connection-pool", 'C', "NUM", OPTION_ARG_OPTIONAL,
429 : : "Use webapi connection pool with NUM threads, default=unlim.", 0 },
430 : : { "include", 'I', "REGEX", 0, "Include files matching REGEX, default=all.", 0 },
431 : : { "exclude", 'X', "REGEX", 0, "Exclude files matching REGEX, default=none.", 0 },
432 : : { "port", 'p', "NUM", 0, "HTTP port to listen on, default 8002.", 0 },
433 : : { "database", 'd', "FILE", 0, "Path to sqlite database.", 0 },
434 : : { "ddl", 'D', "SQL", 0, "Apply extra sqlite ddl/pragma to connection.", 0 },
435 : : { "verbose", 'v', NULL, 0, "Increase verbosity.", 0 },
436 : : { "regex-groom", 'r', NULL, 0,"Uses regexes from -I and -X arguments to groom the database.",0},
437 : : #define ARGP_KEY_FDCACHE_FDS 0x1001
438 : : { "fdcache-fds", ARGP_KEY_FDCACHE_FDS, "NUM", OPTION_HIDDEN, NULL, 0 },
439 : : #define ARGP_KEY_FDCACHE_MBS 0x1002
440 : : { "fdcache-mbs", ARGP_KEY_FDCACHE_MBS, "MB", 0, "Maximum total size of archive file fdcache.", 0 },
441 : : #define ARGP_KEY_FDCACHE_PREFETCH 0x1003
442 : : { "fdcache-prefetch", ARGP_KEY_FDCACHE_PREFETCH, "NUM", 0, "Number of archive files to prefetch into fdcache.", 0 },
443 : : #define ARGP_KEY_FDCACHE_MINTMP 0x1004
444 : : { "fdcache-mintmp", ARGP_KEY_FDCACHE_MINTMP, "NUM", 0, "Minimum free space% on tmpdir.", 0 },
445 : : #define ARGP_KEY_FDCACHE_PREFETCH_MBS 0x1005
446 : : { "fdcache-prefetch-mbs", ARGP_KEY_FDCACHE_PREFETCH_MBS, "MB", OPTION_HIDDEN, NULL, 0},
447 : : #define ARGP_KEY_FDCACHE_PREFETCH_FDS 0x1006
448 : : { "fdcache-prefetch-fds", ARGP_KEY_FDCACHE_PREFETCH_FDS, "NUM", OPTION_HIDDEN, NULL, 0},
449 : : #define ARGP_KEY_FORWARDED_TTL_LIMIT 0x1007
450 : : {"forwarded-ttl-limit", ARGP_KEY_FORWARDED_TTL_LIMIT, "NUM", 0, "Limit of X-Forwarded-For hops, default 8.", 0},
451 : : #define ARGP_KEY_PASSIVE 0x1008
452 : : { "passive", ARGP_KEY_PASSIVE, NULL, 0, "Do not scan or groom, read-only database.", 0 },
453 : : #define ARGP_KEY_DISABLE_SOURCE_SCAN 0x1009
454 : : { "disable-source-scan", ARGP_KEY_DISABLE_SOURCE_SCAN, NULL, 0, "Do not scan dwarf source info.", 0 },
455 : : #define ARGP_SCAN_CHECKPOINT 0x100A
456 : : { "scan-checkpoint", ARGP_SCAN_CHECKPOINT, "NUM", 0, "Number of files scanned before a WAL checkpoint.", 0 },
457 : : #ifdef ENABLE_IMA_VERIFICATION
458 : : #define ARGP_KEY_KOJI_SIGCACHE 0x100B
459 : : { "koji-sigcache", ARGP_KEY_KOJI_SIGCACHE, NULL, 0, "Do a koji specific mapping of rpm paths to get IMA signatures.", 0 },
460 : : #endif
461 : : #define ARGP_KEY_METADATA_MAXTIME 0x100C
462 : : { "metadata-maxtime", ARGP_KEY_METADATA_MAXTIME, "SECONDS", 0,
463 : : "Number of seconds to limit metadata query run time, 0=unlimited.", 0 },
464 : : { NULL, 0, NULL, 0, NULL, 0 },
465 : : };
466 : :
467 : : /* Short description of program. */
468 : : static const char doc[] = "Serve debuginfo-related content across HTTP from files under PATHs.";
469 : :
470 : : /* Strings for arguments in help texts. */
471 : : static const char args_doc[] = "[PATH ...]";
472 : :
473 : : /* Prototype for option handler. */
474 : : static error_t parse_opt (int key, char *arg, struct argp_state *state);
475 : :
476 : : static unsigned default_concurrency();
477 : :
478 : : /* Data structure to communicate with argp functions. */
479 : : static struct argp argp =
480 : : {
481 : : options, parse_opt, args_doc, doc, NULL, NULL, NULL
482 : : };
483 : :
484 : :
485 : : static string db_path;
486 : : static sqlite3 *db; // single connection, serialized across all our threads!
487 : : static sqlite3 *dbq; // webapi query-servicing readonly connection, serialized ditto!
488 : : static unsigned verbose;
489 : : static volatile sig_atomic_t interrupted = 0;
490 : : static volatile sig_atomic_t forced_rescan_count = 0;
491 : : static volatile sig_atomic_t sigusr1 = 0;
492 : : static volatile sig_atomic_t forced_groom_count = 0;
493 : : static volatile sig_atomic_t sigusr2 = 0;
494 : : static unsigned http_port = 8002;
495 : : static unsigned rescan_s = 300;
496 : : static unsigned groom_s = 86400;
497 : : static bool maxigroom = false;
498 : : static unsigned concurrency = default_concurrency();
499 : : static int connection_pool = 0;
500 : : static set<string> source_paths;
501 : : static bool scan_files = false;
502 : : static map<string,string> scan_archives;
503 : : static vector<string> extra_ddl;
504 : : static regex_t file_include_regex;
505 : : static regex_t file_exclude_regex;
506 : : static bool regex_groom = false;
507 : : static bool traverse_logical;
508 : : static long fdcache_mbs;
509 : : static long fdcache_prefetch;
510 : : static long fdcache_mintmp;
511 : : static unsigned forwarded_ttl_limit = 8;
512 : : static bool scan_source_info = true;
513 : : static string tmpdir;
514 : : static bool passive_p = false;
515 : : static long scan_checkpoint = 256;
516 : : #ifdef ENABLE_IMA_VERIFICATION
517 : : static bool requires_koji_sigcache_mapping = false;
518 : : #endif
519 : : static unsigned metadata_maxtime_s = 5;
520 : :
521 : : static void set_metric(const string& key, double value);
522 : : static void inc_metric(const string& key);
523 : : static void add_metric(const string& metric,
524 : : double value);
525 : : static void set_metric(const string& metric,
526 : : const string& lname, const string& lvalue,
527 : : double value);
528 : : static void inc_metric(const string& metric,
529 : : const string& lname, const string& lvalue);
530 : : static void add_metric(const string& metric,
531 : : const string& lname, const string& lvalue,
532 : : double value);
533 : : static void inc_metric(const string& metric,
534 : : const string& lname, const string& lvalue,
535 : : const string& rname, const string& rvalue);
536 : : static void add_metric(const string& metric,
537 : : const string& lname, const string& lvalue,
538 : : const string& rname, const string& rvalue,
539 : : double value);
540 : :
541 : :
542 : : class tmp_inc_metric { // a RAII style wrapper for exception-safe scoped increment & decrement
543 : : string m, n, v;
544 : : public:
545 : 2133 : tmp_inc_metric(const string& mname, const string& lname, const string& lvalue):
546 [ + - + - ]: 2133 : m(mname), n(lname), v(lvalue)
547 : : {
548 [ + - ]: 2133 : add_metric (m, n, v, 1);
549 [ - - - - : 2133 : }
- - ]
550 : 2133 : ~tmp_inc_metric()
551 : : {
552 : 2133 : add_metric (m, n, v, -1);
553 [ - + - + : 2133 : }
- + ]
554 : : };
555 : :
556 : : class tmp_ms_metric { // a RAII style wrapper for exception-safe scoped timing
557 : : string m, n, v;
558 : : struct timespec ts_start;
559 : : public:
560 : 101379 : tmp_ms_metric(const string& mname, const string& lname, const string& lvalue):
561 [ + - + - ]: 101379 : m(mname), n(lname), v(lvalue)
562 : : {
563 : 101381 : clock_gettime (CLOCK_MONOTONIC, & ts_start);
564 [ - - - - ]: 101395 : }
565 : 101404 : ~tmp_ms_metric()
566 : : {
567 : 101404 : struct timespec ts_end;
568 : 101404 : clock_gettime (CLOCK_MONOTONIC, & ts_end);
569 : 101406 : double deltas = (ts_end.tv_sec - ts_start.tv_sec)
570 : 101406 : + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9;
571 : :
572 [ + - ]: 101406 : add_metric (m + "_milliseconds_sum", n, v, (deltas*1000.0));
573 [ + + ]: 101406 : inc_metric (m + "_milliseconds_count", n, v);
574 [ + + - + : 178329 : }
- + ]
575 : : };
576 : :
577 : :
578 : : /* Handle program arguments. */
579 : : static error_t
580 : 1176 : parse_opt (int key, char *arg,
581 : : struct argp_state *state __attribute__ ((unused)))
582 : : {
583 : 1176 : int rc;
584 [ + + + + : 1176 : switch (key)
+ + + + +
- + + - -
+ + + + +
+ + + + +
- + - + ]
585 : : {
586 : 290 : case 'v': verbose ++; break;
587 : 76 : case 'd':
588 : : /* When using the in-memory database make sure it is shareable,
589 : : so we can open it twice as read/write and read-only. */
590 [ + + ]: 76 : if (strcmp (arg, ":memory:") == 0)
591 : 1190 : db_path = "file::memory:?cache=shared";
592 : : else
593 [ + - ]: 124 : db_path = string(arg);
594 : : break;
595 : 76 : case 'p': http_port = (unsigned) atoi(arg);
596 [ + - ]: 76 : if (http_port == 0 || http_port > 65535)
597 : 0 : argp_failure(state, 1, EINVAL, "port number");
598 : : break;
599 : 48 : case 'F': scan_files = true; break;
600 : 24 : case 'R':
601 [ + - + - : 24 : scan_archives[".rpm"]="cat"; // libarchive groks rpm natively
- + ]
602 : 24 : break;
603 : 16 : case 'U':
604 [ + - + - : 16 : scan_archives[".deb"]="(bsdtar -O -x -f - data.tar\\*)<";
- + ]
605 [ + - + - : 16 : scan_archives[".ddeb"]="(bsdtar -O -x -f - data.tar\\*)<";
- + ]
606 [ + - + - : 16 : scan_archives[".ipk"]="(bsdtar -O -x -f - data.tar\\*)<";
- + ]
607 : : // .udeb too?
608 : 16 : break;
609 : 38 : case 'Z':
610 : 38 : {
611 [ - + ]: 38 : char* extension = strchr(arg, '=');
612 [ - + ]: 38 : if (arg[0] == '\0')
613 : 0 : argp_failure(state, 1, EINVAL, "missing EXT");
614 [ + + ]: 38 : else if (extension)
615 [ + - + - : 20 : scan_archives[string(arg, (extension-arg))]=string(extension+1);
- + - + -
- ]
616 : : else
617 [ + - + - : 18 : scan_archives[string(arg)]=string("cat");
- + - + -
- ]
618 : : }
619 : : break;
620 : 8 : case 'L':
621 [ - + ]: 8 : if (passive_p)
622 : 0 : argp_failure(state, 1, EINVAL, "-L option inconsistent with passive mode");
623 : 8 : traverse_logical = true;
624 : 8 : break;
625 : 0 : case 'D':
626 [ # # ]: 0 : if (passive_p)
627 : 0 : argp_failure(state, 1, EINVAL, "-D option inconsistent with passive mode");
628 [ # # ]: 0 : extra_ddl.push_back(string(arg));
629 : 0 : break;
630 : 62 : case 't':
631 [ - + ]: 62 : if (passive_p)
632 : 0 : argp_failure(state, 1, EINVAL, "-t option inconsistent with passive mode");
633 : 62 : rescan_s = (unsigned) atoi(arg);
634 : 62 : break;
635 : 62 : case 'g':
636 [ - + ]: 62 : if (passive_p)
637 : 0 : argp_failure(state, 1, EINVAL, "-g option inconsistent with passive mode");
638 : 62 : groom_s = (unsigned) atoi(arg);
639 : 62 : break;
640 : 0 : case 'G':
641 [ # # ]: 0 : if (passive_p)
642 : 0 : argp_failure(state, 1, EINVAL, "-G option inconsistent with passive mode");
643 : 0 : maxigroom = true;
644 : 0 : break;
645 : 0 : case 'c':
646 [ # # ]: 0 : if (passive_p)
647 : 0 : argp_failure(state, 1, EINVAL, "-c option inconsistent with passive mode");
648 : 0 : concurrency = (unsigned) atoi(arg);
649 [ # # ]: 0 : if (concurrency < 1) concurrency = 1;
650 : : break;
651 : 6 : case 'C':
652 [ + + ]: 6 : if (arg)
653 : : {
654 : 4 : connection_pool = atoi(arg);
655 [ + - ]: 4 : if (connection_pool < 2)
656 : 0 : argp_failure(state, 1, EINVAL, "-C NUM minimum 2");
657 : : }
658 : : break;
659 : 4 : case 'I':
660 : : // NB: no problem with unconditional free here - an earlier failed regcomp would exit program
661 [ - + ]: 4 : if (passive_p)
662 : 0 : argp_failure(state, 1, EINVAL, "-I option inconsistent with passive mode");
663 : 4 : regfree (&file_include_regex);
664 : 4 : rc = regcomp (&file_include_regex, arg, REG_EXTENDED|REG_NOSUB);
665 [ + - ]: 4 : if (rc != 0)
666 : 0 : argp_failure(state, 1, EINVAL, "regular expression");
667 : : break;
668 : 6 : case 'X':
669 [ - + ]: 6 : if (passive_p)
670 : 0 : argp_failure(state, 1, EINVAL, "-X option inconsistent with passive mode");
671 : 6 : regfree (&file_exclude_regex);
672 : 6 : rc = regcomp (&file_exclude_regex, arg, REG_EXTENDED|REG_NOSUB);
673 [ + - ]: 6 : if (rc != 0)
674 : 0 : argp_failure(state, 1, EINVAL, "regular expression");
675 : : break;
676 : 4 : case 'r':
677 [ - + ]: 4 : if (passive_p)
678 : 0 : argp_failure(state, 1, EINVAL, "-r option inconsistent with passive mode");
679 : 4 : regex_groom = true;
680 : 4 : break;
681 : : case ARGP_KEY_FDCACHE_FDS:
682 : : // deprecated
683 : : break;
684 : 4 : case ARGP_KEY_FDCACHE_MBS:
685 : 4 : fdcache_mbs = atol (arg);
686 : 4 : break;
687 : 2 : case ARGP_KEY_FDCACHE_PREFETCH:
688 : 2 : fdcache_prefetch = atol (arg);
689 : 2 : break;
690 : 2 : case ARGP_KEY_FDCACHE_MINTMP:
691 : 2 : fdcache_mintmp = atol (arg);
692 [ + - ]: 2 : if( fdcache_mintmp > 100 || fdcache_mintmp < 0 )
693 : 0 : argp_failure(state, 1, EINVAL, "fdcache mintmp percent");
694 : : break;
695 : 4 : case ARGP_KEY_FORWARDED_TTL_LIMIT:
696 : 4 : forwarded_ttl_limit = (unsigned) atoi(arg);
697 : 4 : break;
698 : 104 : case ARGP_KEY_ARG:
699 [ + - ]: 104 : source_paths.insert(string(arg));
700 : 104 : break;
701 : : case ARGP_KEY_FDCACHE_PREFETCH_FDS:
702 : : // deprecated
703 : : break;
704 : : case ARGP_KEY_FDCACHE_PREFETCH_MBS:
705 : : // deprecated
706 : : break;
707 : 2 : case ARGP_KEY_PASSIVE:
708 : 2 : passive_p = true;
709 [ + - ]: 2 : if (source_paths.size() > 0
710 [ + - ]: 2 : || maxigroom
711 [ + - ]: 2 : || extra_ddl.size() > 0
712 [ + - + - ]: 4 : || traverse_logical)
713 : : // other conflicting options tricky to check
714 : 0 : argp_failure(state, 1, EINVAL, "inconsistent options with passive mode");
715 : : break;
716 : 0 : case ARGP_KEY_DISABLE_SOURCE_SCAN:
717 : 0 : scan_source_info = false;
718 : 0 : break;
719 : 2 : case ARGP_SCAN_CHECKPOINT:
720 : 2 : scan_checkpoint = atol (arg);
721 [ + - ]: 2 : if (scan_checkpoint < 0)
722 : 0 : argp_failure(state, 1, EINVAL, "scan checkpoint");
723 : : break;
724 : 0 : case ARGP_KEY_METADATA_MAXTIME:
725 : 0 : metadata_maxtime_s = (unsigned) atoi(arg);
726 : 0 : break;
727 : : #ifdef ENABLE_IMA_VERIFICATION
728 : : case ARGP_KEY_KOJI_SIGCACHE:
729 : : requires_koji_sigcache_mapping = true;
730 : : break;
731 : : #endif
732 : : // case 'h': argp_state_help (state, stderr, ARGP_HELP_LONG|ARGP_HELP_EXIT_OK);
733 : : default: return ARGP_ERR_UNKNOWN;
734 : : }
735 : :
736 : : return 0;
737 : : }
738 : :
739 : :
740 : : ////////////////////////////////////////////////////////////////////////
741 : :
742 : :
743 : : static void add_mhd_response_header (struct MHD_Response *r,
744 : : const char *h, const char *v);
745 : :
746 : : // represent errors that may get reported to an ostream and/or a libmicrohttpd connection
747 : :
748 : 8 : struct reportable_exception
749 : : {
750 : : int code;
751 : : string message;
752 : :
753 [ - - + - : 86 : reportable_exception(int c, const string& m): code(c), message(m) {}
- - + - +
- ]
754 [ - - - - : 604 : reportable_exception(const string& m): code(503), message(m) {}
- - - - -
- - - + -
- - - - +
- - - - -
- - + - -
- ]
755 : : reportable_exception(): code(503), message() {}
756 : :
757 : : void report(ostream& o) const; // defined under obatched() class below
758 : :
759 : 634 : MHD_RESULT mhd_send_response(MHD_Connection* c) const {
760 : 1268 : MHD_Response* r = MHD_create_response_from_buffer (message.size(),
761 : 634 : (void*) message.c_str(),
762 : : MHD_RESPMEM_MUST_COPY);
763 : 634 : add_mhd_response_header (r, "Content-Type", "text/plain");
764 : 634 : MHD_RESULT rc = MHD_queue_response (c, code, r);
765 : 634 : MHD_destroy_response (r);
766 : 634 : return rc;
767 : : }
768 : : };
769 : :
770 : :
771 : : struct sqlite_exception: public reportable_exception
772 : : {
773 : 0 : sqlite_exception(int rc, const string& msg):
774 [ # # # # : 0 : reportable_exception(string("sqlite3 error: ") + msg + ": " + string(sqlite3_errstr(rc) ?: "?")) {
# # # # #
# # # # #
# # # # #
# ]
775 [ # # # # : 0 : inc_metric("error_count","sqlite3",sqlite3_errstr(rc));
# # # # #
# # # # #
# # # # ]
776 [ # # ]: 0 : }
777 : : };
778 : :
779 [ + - - - ]: 4 : struct libc_exception: public reportable_exception
780 : : {
781 : 594 : libc_exception(int rc, const string& msg):
782 [ - + + - : 2376 : reportable_exception(string("libc error: ") + msg + ": " + string(strerror(rc) ?: "?")) {
+ - + - +
- - + - +
- + + - -
- ]
783 [ + - + - : 1188 : inc_metric("error_count","libc",strerror(rc));
+ - + - -
+ + - - -
- - ]
784 [ - - ]: 594 : }
785 : : };
786 : :
787 : :
788 : : struct archive_exception: public reportable_exception
789 : : {
790 : 0 : archive_exception(const string& msg):
791 [ # # # # : 0 : reportable_exception(string("libarchive error: ") + msg) {
# # ]
792 [ # # # # : 0 : inc_metric("error_count","libarchive",msg);
# # # # #
# ]
793 [ # # ]: 0 : }
794 : 0 : archive_exception(struct archive* a, const string& msg):
795 [ # # # # : 0 : reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) {
# # # # #
# # # # #
# # # # #
# ]
796 [ # # # # : 0 : inc_metric("error_count","libarchive",msg + ": " + string(archive_error_string(a) ?: "?"));
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# ]
797 [ # # ]: 0 : }
798 : : };
799 : :
800 : :
801 : : struct elfutils_exception: public reportable_exception
802 : : {
803 : 0 : elfutils_exception(int rc, const string& msg):
804 [ # # # # : 0 : reportable_exception(string("elfutils error: ") + msg + ": " + string(elf_errmsg(rc) ?: "?")) {
# # # # #
# # # # #
# # # # #
# ]
805 [ # # # # : 0 : inc_metric("error_count","elfutils",elf_errmsg(rc));
# # # # #
# # # # #
# # # # ]
806 [ # # ]: 0 : }
807 : : };
808 : :
809 : :
810 : : ////////////////////////////////////////////////////////////////////////
811 : :
812 : : template <typename Payload>
813 : : class workq
814 : : {
815 : : unordered_set<Payload> q; // eliminate duplicates
816 : : mutex mtx;
817 : : condition_variable cv;
818 : : bool dead;
819 : : unsigned idlers; // number of threads busy with wait_idle / done_idle
820 : : unsigned fronters; // number of threads busy with wait_front / done_front
821 : :
822 : : public:
823 : 76 : workq() { dead = false; idlers = 0; fronters = 0; }
824 : 76 : ~workq() {}
825 : :
826 : 952 : void push_back(const Payload& p)
827 : : {
828 : 952 : unique_lock<mutex> lock(mtx);
829 [ + - ]: 952 : q.insert (p);
830 [ + - + - : 1904 : set_metric("thread_work_pending","role","scan", q.size());
+ - + - -
+ - + - -
- - ]
831 : 952 : cv.notify_all();
832 : 952 : }
833 : :
834 : : // kill this workqueue, wake up all idlers / scanners
835 : 76 : void nuke() {
836 : 76 : unique_lock<mutex> lock(mtx);
837 : : // optional: q.clear();
838 : 76 : dead = true;
839 : 76 : cv.notify_all();
840 : 76 : }
841 : :
842 : : // clear the workqueue, when scanning is interrupted with USR2
843 : 0 : void clear() {
844 : 0 : unique_lock<mutex> lock(mtx);
845 : 0 : q.clear();
846 [ # # # # : 0 : set_metric("thread_work_pending","role","scan", q.size());
# # # # #
# # # # #
# # ]
847 : : // NB: there may still be some live fronters
848 : 0 : cv.notify_all(); // maybe wake up waiting idlers
849 : 0 : }
850 : :
851 : : // block this scanner thread until there is work to do and no active idler
852 : 1224 : bool wait_front (Payload& p)
853 : : {
854 : 1224 : unique_lock<mutex> lock(mtx);
855 [ + + + + : 4466 : while (!dead && (q.size() == 0 || idlers > 0))
+ + ]
856 [ + - ]: 3242 : cv.wait(lock);
857 [ + + ]: 1224 : if (dead)
858 : : return false;
859 : : else
860 : : {
861 [ + - ]: 952 : p = * q.begin();
862 : 952 : q.erase (q.begin());
863 : 952 : fronters ++; // prevent idlers from starting awhile, even if empty q
864 [ + - + - : 1904 : set_metric("thread_work_pending","role","scan", q.size());
+ - + - -
+ - + - -
- - - - ]
865 : : // NB: don't wake up idlers yet! The consumer is busy
866 : : // processing this element until it calls done_front().
867 : 952 : return true;
868 : : }
869 : 1224 : }
870 : :
871 : : // notify waitq that scanner thread is done with that last item
872 : 952 : void done_front ()
873 : : {
874 : 952 : unique_lock<mutex> lock(mtx);
875 : 952 : fronters --;
876 [ + + + + ]: 952 : if (q.size() == 0 && fronters == 0)
877 : 96 : cv.notify_all(); // maybe wake up waiting idlers
878 : 952 : }
879 : :
880 : : // block this idler thread until there is no work to do
881 : 575 : void wait_idle ()
882 : : {
883 : 575 : unique_lock<mutex> lock(mtx);
884 : 576 : cv.notify_all(); // maybe wake up waiting scanners
885 [ + + + + : 618 : while (!dead && ((q.size() != 0) || fronters > 0))
+ + ]
886 [ + - ]: 42 : cv.wait(lock);
887 [ + - ]: 576 : idlers ++;
888 : 576 : }
889 : :
890 : 502 : void done_idle ()
891 : : {
892 : 502 : unique_lock<mutex> lock(mtx);
893 : 502 : idlers --;
894 : 502 : cv.notify_all(); // maybe wake up waiting scanners, but probably not (shutting down)
895 : 502 : }
896 : : };
897 : :
898 : : typedef struct stat stat_t;
899 : : typedef pair<string,stat_t> scan_payload;
900 : : inline bool operator< (const scan_payload& a, const scan_payload& b)
901 : : {
902 : : return a.first < b.first; // don't bother compare the stat fields
903 : : }
904 : :
905 : : namespace std { // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56480
906 : : template<> struct hash<::scan_payload>
907 : : {
908 : 3702 : std::size_t operator() (const ::scan_payload& p) const noexcept
909 : : {
910 [ + + + + ]: 3702 : return hash<string>()(p.first);
911 : : }
912 : : };
913 : : template<> struct equal_to<::scan_payload>
914 : : {
915 : 357 : std::size_t operator() (const ::scan_payload& a, const ::scan_payload& b) const noexcept
916 : : {
917 [ - + - - ]: 357 : return a.first == b.first;
918 : : }
919 : : };
920 : : }
921 : :
922 : : static workq<scan_payload> scanq; // just a single one
923 : : // producer & idler: thread_main_fts_source_paths()
924 : : // consumer: thread_main_scanner()
925 : : // idler: thread_main_groom()
926 : :
927 : :
928 : : ////////////////////////////////////////////////////////////////////////
929 : :
930 : : // Unique set is a thread-safe structure that lends 'ownership' of a value
931 : : // to a thread. Other threads requesting the same thing are made to wait.
932 : : // It's like a semaphore-on-demand.
933 : : template <typename T>
934 : : class unique_set
935 : : {
936 : : private:
937 : : set<T> values;
938 : : mutex mtx;
939 : : condition_variable cv;
940 : : public:
941 : 56 : unique_set() {}
942 : 56 : ~unique_set() {}
943 : :
944 : 1388 : void acquire(const T& value)
945 : : {
946 : 1388 : unique_lock<mutex> lock(mtx);
947 [ + + ]: 1859 : while (values.find(value) != values.end())
948 [ + - ]: 471 : cv.wait(lock);
949 [ + - ]: 1388 : values.insert(value);
950 : 1388 : }
951 : :
952 : 1388 : void release(const T& value)
953 : : {
954 : 1388 : unique_lock<mutex> lock(mtx);
955 : : // assert (values.find(value) != values.end());
956 : 1388 : values.erase(value);
957 : 1388 : cv.notify_all();
958 : 1388 : }
959 : : };
960 : :
961 : :
962 : : // This is the object that's instantiate to uniquely hold a value in a
963 : : // RAII-pattern way.
964 : : template <typename T>
965 : : class unique_set_reserver
966 : : {
967 : : private:
968 : : unique_set<T>& please_hold;
969 : : T mine;
970 : : public:
971 : 1388 : unique_set_reserver(unique_set<T>& t, const T& value):
972 [ + - - - ]: 1388 : please_hold(t), mine(value) { please_hold.acquire(mine); }
973 [ + - ]: 1388 : ~unique_set_reserver() { please_hold.release(mine); }
974 : : };
975 : :
976 : :
977 : : ////////////////////////////////////////////////////////////////////////
978 : :
979 : : // periodic_barrier is a concurrency control object that lets N threads
980 : : // periodically (based on counter value) agree to wait at a barrier,
981 : : // let one of them carry out some work, then be set free
982 : :
983 : : class periodic_barrier
984 : : {
985 : : private:
986 : : unsigned period; // number of count() reports to trigger barrier activation
987 : : unsigned threads; // number of threads participating
988 : : mutex mtx; // protects all the following fields
989 : : unsigned counter; // count of count() reports in the current generation
990 : : unsigned generation; // barrier activation generation
991 : : unsigned waiting; // number of threads waiting for barrier
992 : : bool dead; // bring out your
993 : : condition_variable cv;
994 : : public:
995 : 68 : periodic_barrier(unsigned t, unsigned p):
996 : 68 : period(p), threads(t), counter(0), generation(0), waiting(0), dead(false) { }
997 : : virtual ~periodic_barrier() {}
998 : :
999 : : virtual void periodic_barrier_work() noexcept = 0;
1000 : 68 : void nuke() {
1001 : 68 : unique_lock<mutex> lock(mtx);
1002 : 68 : dead = true;
1003 : 68 : cv.notify_all();
1004 : 68 : }
1005 : :
1006 : 1224 : void count()
1007 : : {
1008 : 1224 : unique_lock<mutex> lock(mtx);
1009 : 1224 : unsigned prev_generation = this->generation;
1010 [ + + ]: 1224 : if (counter < period-1) // normal case: counter just freely running
1011 : : {
1012 : 1108 : counter ++;
1013 : 1108 : return;
1014 : : }
1015 [ + + ]: 116 : else if (counter == period-1) // we're the doer
1016 : : {
1017 : 30 : counter = period; // entering barrier holding phase
1018 : 30 : cv.notify_all();
1019 [ + + + + ]: 141 : while (waiting < threads-1 && !dead)
1020 [ + - ]: 81 : cv.wait(lock);
1021 : : // all other threads are now stuck in the barrier
1022 : 30 : this->periodic_barrier_work(); // NB: we're holding the mutex the whole time
1023 : : // reset for next barrier, releasing other waiters
1024 : 30 : counter = 0;
1025 : 30 : generation ++;
1026 : 30 : cv.notify_all();
1027 : 30 : return;
1028 : : }
1029 [ + - ]: 86 : else if (counter == period) // we're a waiter, in holding phase
1030 : : {
1031 : 86 : waiting ++;
1032 : 86 : cv.notify_all();
1033 [ + + + - : 316 : while (counter == period && generation == prev_generation && !dead)
+ + ]
1034 [ + - ]: 144 : cv.wait(lock);
1035 : 86 : waiting --;
1036 : 86 : return;
1037 : : }
1038 : 1224 : }
1039 : : };
1040 : :
1041 : :
1042 : :
1043 : : ////////////////////////////////////////////////////////////////////////
1044 : :
1045 : :
1046 : : // Print a standard timestamp.
1047 : : static ostream&
1048 : 20169 : timestamp (ostream &o)
1049 : : {
1050 : 20169 : char datebuf[80];
1051 : 20169 : char *now2 = NULL;
1052 : 20169 : time_t now_t = time(NULL);
1053 : 20168 : struct tm now;
1054 : 20168 : struct tm *nowp = gmtime_r (&now_t, &now);
1055 [ + - ]: 20168 : if (nowp)
1056 : : {
1057 : 20168 : (void) strftime (datebuf, sizeof (datebuf), "%c", nowp);
1058 : 20168 : now2 = datebuf;
1059 : : }
1060 : :
1061 : 20168 : return o << "[" << (now2 ? now2 : "") << "] "
1062 [ - + ]: 20168 : << "(" << getpid () << "/" << tid() << "): ";
1063 : : }
1064 : :
1065 : :
1066 : : // A little class that impersonates an ostream to the extent that it can
1067 : : // take << streaming operations. It batches up the bits into an internal
1068 : : // stringstream until it is destroyed; then flushes to the original ostream.
1069 : : // It adds a timestamp
1070 : : class obatched
1071 : : {
1072 : : private:
1073 : : ostream& o;
1074 : : stringstream stro;
1075 : : static mutex lock;
1076 : : public:
1077 : 20169 : obatched(ostream& oo, bool timestamp_p = true): o(oo)
1078 : : {
1079 [ + - ]: 20169 : if (timestamp_p)
1080 [ + - ]: 20169 : timestamp(stro);
1081 : 20166 : }
1082 : 20165 : ~obatched()
1083 : : {
1084 : 20165 : unique_lock<mutex> do_not_cross_the_streams(obatched::lock);
1085 [ + - ]: 20169 : o << stro.str();
1086 : 20169 : o.flush();
1087 : 20169 : }
1088 : : operator ostream& () { return stro; }
1089 [ - - + - : 14675 : template <typename T> ostream& operator << (const T& t) { stro << t; return stro; }
+ - + - +
- + - + -
- - - - -
- + - + -
+ - + - +
- + - + -
+ - + - +
- + - + -
- - + - +
- + - + -
+ - + - +
- + - - -
+ - + - +
- + - + -
+ - + - +
- + - - -
+ - - - +
- + - + -
- - - - -
- + - - -
+ - - - -
- - - + -
- - + - -
- + - - -
- - + - +
- + - + -
+ - + - -
- - - ]
1090 : : };
1091 : : mutex obatched::lock; // just the one, since cout/cerr iostreams are not thread-safe
1092 : :
1093 : :
1094 : 696 : void reportable_exception::report(ostream& o) const {
1095 [ + - + - ]: 696 : obatched(o) << message << endl;
1096 : 696 : }
1097 : :
1098 : :
1099 : : ////////////////////////////////////////////////////////////////////////
1100 : :
1101 : :
1102 : : // RAII style sqlite prepared-statement holder that matches { } block lifetime
1103 : :
1104 : : struct sqlite_ps
1105 : : {
1106 : : private:
1107 : : sqlite3* db;
1108 : : const string nickname;
1109 : : const string sql;
1110 : : sqlite3_stmt *pp;
1111 : :
1112 : : sqlite_ps(const sqlite_ps&); // make uncopyable
1113 : : sqlite_ps& operator=(const sqlite_ps &); // make unassignable
1114 : :
1115 : : public:
1116 [ + - - - ]: 6959 : sqlite_ps (sqlite3* d, const string& n, const string& s): db(d), nickname(n), sql(s) {
1117 : : // tmp_ms_metric tick("sqlite3","prep",nickname);
1118 [ + + ]: 6959 : if (verbose > 4)
1119 [ + - + - : 360 : obatched(clog) << nickname << " prep " << sql << endl;
+ - + - +
- - - ]
1120 [ + - ]: 6959 : int rc = sqlite3_prepare_v2 (db, sql.c_str(), -1 /* to \0 */, & this->pp, NULL);
1121 [ - + ]: 6960 : if (rc != SQLITE_OK)
1122 [ # # # # ]: 0 : throw sqlite_exception(rc, "prepare " + sql);
1123 : 6960 : }
1124 : :
1125 : 55685 : sqlite_ps& reset()
1126 : : {
1127 [ + - + - : 111371 : tmp_ms_metric tick("sqlite3","reset",nickname);
- + - - ]
1128 [ + - ]: 55686 : sqlite3_reset(this->pp);
1129 : 55686 : return *this;
1130 : 55686 : }
1131 : :
1132 : 59433 : sqlite_ps& bind(int parameter, const string& str)
1133 : : {
1134 [ + + ]: 59433 : if (verbose > 4)
1135 [ + - + - : 1417 : obatched(clog) << nickname << " bind " << parameter << "=" << str << endl;
+ - + - +
- + - ]
1136 : 59434 : int rc = sqlite3_bind_text (this->pp, parameter, str.c_str(), -1, SQLITE_TRANSIENT);
1137 [ - + ]: 59431 : if (rc != SQLITE_OK)
1138 [ # # # # ]: 0 : throw sqlite_exception(rc, "sqlite3 bind");
1139 : 59431 : return *this;
1140 : : }
1141 : :
1142 : 18181 : sqlite_ps& bind(int parameter, int64_t value)
1143 : : {
1144 [ + + ]: 18181 : if (verbose > 4)
1145 [ + - + - : 636 : obatched(clog) << nickname << " bind " << parameter << "=" << value << endl;
+ - + - +
- + - ]
1146 : 18181 : int rc = sqlite3_bind_int64 (this->pp, parameter, value);
1147 [ - + ]: 18182 : if (rc != SQLITE_OK)
1148 [ # # # # ]: 0 : throw sqlite_exception(rc, "sqlite3 bind");
1149 : 18182 : return *this;
1150 : : }
1151 : :
1152 : : sqlite_ps& bind(int parameter)
1153 : : {
1154 : : if (verbose > 4)
1155 : : obatched(clog) << nickname << " bind " << parameter << "=" << "NULL" << endl;
1156 : : int rc = sqlite3_bind_null (this->pp, parameter);
1157 : : if (rc != SQLITE_OK)
1158 : : throw sqlite_exception(rc, "sqlite3 bind");
1159 : : return *this;
1160 : : }
1161 : :
1162 : :
1163 : 32787 : void step_ok_done() {
1164 [ + - + - : 65574 : tmp_ms_metric tick("sqlite3","step_done",nickname);
- + - - ]
1165 [ + - ]: 32787 : int rc = sqlite3_step (this->pp);
1166 [ + + ]: 32788 : if (verbose > 4)
1167 [ + - + - : 844 : obatched(clog) << nickname << " step-ok-done(" << sqlite3_errstr(rc) << ") " << sql << endl;
+ - + - +
- + - + -
+ - ]
1168 [ + + - + ]: 32788 : if (rc != SQLITE_OK && rc != SQLITE_DONE && rc != SQLITE_ROW)
1169 [ # # # # ]: 0 : throw sqlite_exception(rc, "sqlite3 step");
1170 [ + - ]: 32788 : (void) sqlite3_reset (this->pp);
1171 : 32788 : }
1172 : :
1173 : :
1174 : 12930 : int step() {
1175 [ + - + - : 25861 : tmp_ms_metric tick("sqlite3","step",nickname);
- + - - ]
1176 [ + - ]: 12931 : int rc = sqlite3_step (this->pp);
1177 [ + + ]: 12932 : if (verbose > 4)
1178 [ + - + - : 378 : obatched(clog) << nickname << " step(" << sqlite3_errstr(rc) << ") " << sql << endl;
+ - + - +
- + - + -
+ - ]
1179 : 12932 : return rc;
1180 : 12932 : }
1181 : :
1182 [ + + + + ]: 13769 : ~sqlite_ps () { sqlite3_finalize (this->pp); }
1183 [ + - + - : 2640 : operator sqlite3_stmt* () { return this->pp; }
+ - + - +
- + - ]
1184 : : };
1185 : :
1186 : :
1187 : : ////////////////////////////////////////////////////////////////////////
1188 : :
1189 : :
1190 : : struct sqlite_checkpoint_pb: public periodic_barrier
1191 : : {
1192 : : // NB: don't use sqlite_ps since it can throw exceptions during ctor etc.
1193 : 68 : sqlite_checkpoint_pb(unsigned t, unsigned p):
1194 : 136 : periodic_barrier(t, p) { }
1195 : :
1196 : 30 : void periodic_barrier_work() noexcept
1197 : : {
1198 : 30 : (void) sqlite3_exec (db, "pragma wal_checkpoint(truncate);", NULL, NULL, NULL);
1199 : 30 : }
1200 : : };
1201 : :
1202 : : static periodic_barrier* scan_barrier = 0; // initialized in main()
1203 : :
1204 : :
1205 : : ////////////////////////////////////////////////////////////////////////
1206 : :
1207 : : // RAII style templated autocloser
1208 : :
1209 : : template <class Payload, class Ignore>
1210 : : struct defer_dtor
1211 : : {
1212 : : public:
1213 : : typedef Ignore (*dtor_fn) (Payload);
1214 : :
1215 : : private:
1216 : : Payload p;
1217 : : dtor_fn fn;
1218 : :
1219 : : public:
1220 : 3448 : defer_dtor(Payload _p, dtor_fn _fn): p(_p), fn(_fn) {}
1221 : 746 : ~defer_dtor() { (void) (*fn)(p); }
1222 : :
1223 : : private:
1224 : : defer_dtor(const defer_dtor<Payload,Ignore>&); // make uncopyable
1225 : : defer_dtor& operator=(const defer_dtor<Payload,Ignore> &); // make unassignable
1226 : : };
1227 : :
1228 : :
1229 : :
1230 : : ////////////////////////////////////////////////////////////////////////
1231 : :
1232 : :
1233 : : static string
1234 : 4278 : header_censor(const string& str)
1235 : : {
1236 : 4278 : string y;
1237 [ + + ]: 36351 : for (auto&& x : str)
1238 : : {
1239 [ + + ]: 32073 : if (isalnum(x) || x == '/' || x == '.' || x == ',' || x == '_' || x == ':')
1240 [ + - ]: 64140 : y += x;
1241 : : }
1242 : 4278 : return y;
1243 : 0 : }
1244 : :
1245 : :
1246 : : static string
1247 : 2139 : conninfo (struct MHD_Connection * conn)
1248 : : {
1249 : 2139 : char hostname[256]; // RFC1035
1250 : 2139 : char servname[256];
1251 : 2139 : int sts = -1;
1252 : :
1253 [ - + ]: 2139 : if (conn == 0)
1254 : 0 : return "internal";
1255 : :
1256 : : /* Look up client address data. */
1257 : 2139 : const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn,
1258 : : MHD_CONNECTION_INFO_CLIENT_ADDRESS);
1259 [ + - ]: 2139 : struct sockaddr *so = u ? u->client_addr : 0;
1260 : :
1261 [ + - - + ]: 2139 : if (so && so->sa_family == AF_INET) {
1262 : 0 : sts = getnameinfo (so, sizeof (struct sockaddr_in),
1263 : : hostname, sizeof (hostname),
1264 : : servname, sizeof (servname),
1265 : : NI_NUMERICHOST | NI_NUMERICSERV);
1266 [ + - ]: 2139 : } else if (so && so->sa_family == AF_INET6) {
1267 : 2139 : struct sockaddr_in6* addr6 = (struct sockaddr_in6*) so;
1268 [ + - + - : 2139 : if (IN6_IS_ADDR_V4MAPPED(&addr6->sin6_addr)) {
- + ]
1269 : 2139 : struct sockaddr_in addr4;
1270 : 2139 : memset (&addr4, 0, sizeof(addr4));
1271 : 2139 : addr4.sin_family = AF_INET;
1272 : 2139 : addr4.sin_port = addr6->sin6_port;
1273 : 2139 : memcpy (&addr4.sin_addr.s_addr, addr6->sin6_addr.s6_addr+12, sizeof(addr4.sin_addr.s_addr));
1274 : 2139 : sts = getnameinfo ((struct sockaddr*) &addr4, sizeof (addr4),
1275 : : hostname, sizeof (hostname),
1276 : : servname, sizeof (servname),
1277 : : NI_NUMERICHOST | NI_NUMERICSERV);
1278 : : } else {
1279 : 0 : sts = getnameinfo (so, sizeof (struct sockaddr_in6),
1280 : : hostname, sizeof (hostname),
1281 : : servname, sizeof (servname),
1282 : : NI_NUMERICHOST | NI_NUMERICSERV);
1283 : : }
1284 : : }
1285 : :
1286 [ - + ]: 2139 : if (sts != 0) {
1287 : 0 : hostname[0] = servname[0] = '\0';
1288 : : }
1289 : :
1290 : : // extract headers relevant to administration
1291 [ - + ]: 2139 : const char* user_agent = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: "";
1292 [ + + ]: 2139 : const char* x_forwarded_for = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: "";
1293 : : // NB: these are untrustworthy, beware if machine-processing log files
1294 : :
1295 [ + - + - : 6417 : return string(hostname) + string(":") + string(servname) +
+ - + - +
- + - - +
- + - + -
+ - + - +
- - - - -
- ]
1296 [ + - + - : 8874 : string(" UA:") + header_censor(string(user_agent)) +
+ - + - +
- - + + +
- + + + -
+ - - -
- ]
1297 [ + - + - : 6429 : string(" XFF:") + header_censor(string(x_forwarded_for));
+ - + + +
+ - - ]
1298 : : }
1299 : :
1300 : :
1301 : :
1302 : : ////////////////////////////////////////////////////////////////////////
1303 : :
1304 : : /* Wrapper for MHD_add_response_header that logs an error if we
1305 : : couldn't add the specified header. */
1306 : : static void
1307 : 6139 : add_mhd_response_header (struct MHD_Response *r,
1308 : : const char *h, const char *v)
1309 : : {
1310 [ - + ]: 6139 : if (MHD_add_response_header (r, h, v) == MHD_NO)
1311 [ # # # # : 0 : obatched(clog) << "Error: couldn't add '" << h << "' header" << endl;
# # ]
1312 : 6139 : }
1313 : :
1314 : : static void
1315 : 806 : add_mhd_last_modified (struct MHD_Response *resp, time_t mtime)
1316 : : {
1317 : 806 : struct tm now;
1318 : 806 : struct tm *nowp = gmtime_r (&mtime, &now);
1319 [ + - ]: 806 : if (nowp != NULL)
1320 : : {
1321 : 806 : char datebuf[80];
1322 : 806 : size_t rc = strftime (datebuf, sizeof (datebuf), "%a, %d %b %Y %T GMT",
1323 : : nowp);
1324 [ + - ]: 806 : if (rc > 0 && rc < sizeof (datebuf))
1325 : 806 : add_mhd_response_header (resp, "Last-Modified", datebuf);
1326 : : }
1327 : :
1328 : 806 : add_mhd_response_header (resp, "Cache-Control", "public");
1329 : 806 : }
1330 : :
1331 : : // quote all questionable characters of str for safe passage through a sh -c expansion.
1332 : : static string
1333 : 60 : shell_escape(const string& str)
1334 : : {
1335 : 60 : string y;
1336 [ + + ]: 7784 : for (auto&& x : str)
1337 : : {
1338 [ + + + + ]: 7724 : if (! isalnum(x) && x != '/')
1339 [ + - ]: 608 : y += "\\";
1340 [ + - ]: 15448 : y += x;
1341 : : }
1342 : 60 : return y;
1343 : 0 : }
1344 : :
1345 : :
1346 : : // PR25548: Perform POSIX / RFC3986 style path canonicalization on the input string.
1347 : : //
1348 : : // Namely:
1349 : : // // -> /
1350 : : // /foo/../ -> /
1351 : : // /./ -> /
1352 : : //
1353 : : // This mapping is done on dwarf-side source path names, which may
1354 : : // include these constructs, so we can deal with debuginfod clients
1355 : : // that accidentally canonicalize the paths.
1356 : : //
1357 : : // realpath(3) is close but not quite right, because it also resolves
1358 : : // symbolic links. Symlinks at the debuginfod server have nothing to
1359 : : // do with the build-time symlinks, thus they must not be considered.
1360 : : //
1361 : : // see also curl Curl_dedotdotify() aka RFC3986, which we mostly follow here
1362 : : // see also libc __realpath()
1363 : : // see also llvm llvm::sys::path::remove_dots()
1364 : : static string
1365 : 3672 : canon_pathname (const string& input)
1366 : : {
1367 : 3672 : string i = input; // 5.2.4 (1)
1368 : 3672 : string o;
1369 : :
1370 : 34324 : while (i.size() != 0)
1371 : : {
1372 : : // 5.2.4 (2) A
1373 [ + - - + : 61304 : if (i.substr(0,3) == "../")
- + ]
1374 [ # # # # ]: 0 : i = i.substr(3);
1375 [ + - - + : 61304 : else if(i.substr(0,2) == "./")
- + ]
1376 [ # # # # ]: 0 : i = i.substr(2);
1377 : :
1378 : : // 5.2.4 (2) B
1379 [ + - - + : 61304 : else if (i.substr(0,3) == "/./")
+ + ]
1380 [ + - + + ]: 622 : i = i.substr(2);
1381 [ - + ]: 30270 : else if (i == "/.")
1382 [ # # ]: 0 : i = ""; // no need to handle "/." complete-path-segment case; we're dealing with file names
1383 : :
1384 : : // 5.2.4 (2) C
1385 [ + - - + : 60540 : else if (i.substr(0,4) == "/../") {
+ + ]
1386 [ + - + + ]: 484 : i = i.substr(3);
1387 : 484 : string::size_type sl = o.rfind("/");
1388 [ + - ]: 484 : if (sl != string::npos)
1389 [ + - + - ]: 968 : o = o.substr(0, sl);
1390 : : else
1391 [ # # ]: 0 : o = "";
1392 [ - + ]: 29786 : } else if (i == "/..")
1393 [ # # ]: 0 : i = ""; // no need to handle "/.." complete-path-segment case; we're dealing with file names
1394 : :
1395 : : // 5.2.4 (2) D
1396 : : // no need to handle these cases; we're dealing with file names
1397 [ - + ]: 29786 : else if (i == ".")
1398 [ # # ]: 0 : i = "";
1399 [ - + ]: 29786 : else if (i == "..")
1400 [ # # ]: 0 : i = "";
1401 : :
1402 : : // POSIX special: map // to /
1403 [ + - - + : 59572 : else if (i.substr(0,2) == "//")
+ + ]
1404 [ + - + + ]: 152 : i = i.substr(1);
1405 : :
1406 : : // 5.2.4 (2) E
1407 : : else {
1408 [ - + ]: 29650 : string::size_type next_slash = i.find("/", (i[0]=='/' ? 1 : 0)); // skip first slash
1409 [ + - + + ]: 59300 : o += i.substr(0, next_slash);
1410 [ + + ]: 29650 : if (next_slash == string::npos)
1411 [ + - + + ]: 37996 : i = "";
1412 : : else
1413 [ + - + + : 48974 : i = i.substr(next_slash);
- - ]
1414 : : }
1415 : : }
1416 : :
1417 [ + - ]: 3672 : return o;
1418 : 3672 : }
1419 : :
1420 : :
1421 : : // Estimate available free space for a given filesystem via statfs(2).
1422 : : // Return true if the free fraction is known to be smaller than the
1423 : : // given minimum percentage. Also update a related metric.
1424 : 2188 : bool statfs_free_enough_p(const string& path, const string& label, long minfree = 0)
1425 : : {
1426 : 2188 : struct statfs sfs;
1427 : 2188 : int rc = statfs(path.c_str(), &sfs);
1428 [ + + ]: 2187 : if (rc == 0)
1429 : : {
1430 : 2137 : double s = (double) sfs.f_bavail / (double) sfs.f_blocks;
1431 [ + - + - : 4275 : set_metric("filesys_free_ratio","purpose",label, s);
- + - - ]
1432 : 2138 : return ((s * 100.0) < minfree);
1433 : : }
1434 : : return false;
1435 : : }
1436 : :
1437 : :
1438 : :
1439 : : // A map-like class that owns a cache of file descriptors (indexed by
1440 : : // file / content names).
1441 : : //
1442 : : // If only it could use fd's instead of file names ... but we can't
1443 : : // dup(2) to create independent descriptors for the same unlinked
1444 : : // files, so would have to use some goofy linux /proc/self/fd/%d
1445 : : // hack such as the following
1446 : :
1447 : : #if 0
1448 : : int superdup(int fd)
1449 : : {
1450 : : #ifdef __linux__
1451 : : char *fdpath = NULL;
1452 : : int rc = asprintf(& fdpath, "/proc/self/fd/%d", fd);
1453 : : int newfd;
1454 : : if (rc >= 0)
1455 : : newfd = open(fdpath, O_RDONLY);
1456 : : else
1457 : : newfd = -1;
1458 : : free (fdpath);
1459 : : return newfd;
1460 : : #else
1461 : : return -1;
1462 : : #endif
1463 : : }
1464 : : #endif
1465 : :
1466 : : class libarchive_fdcache
1467 : : {
1468 : : private:
1469 : : mutex fdcache_lock;
1470 : :
1471 : : typedef pair<string,string> key; // archive, entry
1472 [ + - ]: 210 : struct fdcache_entry
1473 : : {
1474 : : string fd; // file name (probably in $TMPDIR), not an actual open fd (EMFILE)
1475 : : double fd_size_mb; // slightly rounded up megabytes
1476 : : time_t freshness; // when was this entry created or requested last
1477 : : unsigned request_count; // how many requests were made; or 0=prefetch only
1478 : : double latency; // how many seconds it took to extract the file
1479 : : };
1480 : :
1481 : : map<key,fdcache_entry> entries; // optimized for lookup
1482 : : time_t last_cleaning;
1483 : : long max_mbs;
1484 : :
1485 : : public:
1486 : 286 : void set_metrics()
1487 : : {
1488 : 286 : double fdcache_mb = 0.0;
1489 : 286 : double prefetch_mb = 0.0;
1490 : 286 : unsigned fdcache_count = 0;
1491 : 286 : unsigned prefetch_count = 0;
1492 [ + + ]: 1736 : for (auto &i : entries) {
1493 [ + + ]: 1450 : if (i.second.request_count) {
1494 : 1356 : fdcache_mb += i.second.fd_size_mb;
1495 : 1356 : fdcache_count ++;
1496 : : } else {
1497 : 94 : prefetch_mb += i.second.fd_size_mb;
1498 : 94 : prefetch_count ++;
1499 : : }
1500 : : }
1501 [ + - ]: 286 : set_metric("fdcache_bytes", fdcache_mb*1024.0*1024.0);
1502 [ + - ]: 286 : set_metric("fdcache_count", fdcache_count);
1503 [ + - ]: 286 : set_metric("fdcache_prefetch_bytes", prefetch_mb*1024.0*1024.0);
1504 [ + - ]: 286 : set_metric("fdcache_prefetch_count", prefetch_count);
1505 : 286 : }
1506 : :
1507 : 210 : void intern(const string& a, const string& b, string fd, off_t sz,
1508 : : bool requested_p, double lat)
1509 : : {
1510 : 210 : {
1511 : 210 : unique_lock<mutex> lock(fdcache_lock);
1512 : 210 : time_t now = time(NULL);
1513 : : // there is a chance it's already in here, just wasn't found last time
1514 : : // if so, there's nothing to do but count our luck
1515 [ + - ]: 210 : auto i = entries.find(make_pair(a,b));
1516 [ - + ]: 210 : if (i != entries.end())
1517 : : {
1518 [ # # # # : 0 : inc_metric("fdcache_op_count","op","redundant_intern");
# # # # #
# # # # #
# # ]
1519 [ # # ]: 0 : if (requested_p) i->second.request_count ++; // repeat prefetch doesn't count
1520 : 0 : i->second.freshness = now;
1521 : : // We need to nuke the temp file, since interning passes
1522 : : // responsibility over the path to this structure. It is
1523 : : // possible that the caller still has an fd open, but that's
1524 : : // OK.
1525 : 0 : unlink (fd.c_str());
1526 : 0 : return;
1527 : : }
1528 : 210 : double mb = (sz+65535)/1048576.0; // round up to 64K block
1529 : 210 : fdcache_entry n = { .fd=fd, .fd_size_mb=mb,
1530 : 210 : .freshness=now, .request_count = requested_p?1U:0U,
1531 [ + - + + ]: 210 : .latency=lat};
1532 [ + - + - : 210 : entries.insert(make_pair(make_pair(a,b),n));
+ - ]
1533 : :
1534 [ + + ]: 210 : if (requested_p)
1535 [ + - + - : 324 : inc_metric("fdcache_op_count","op","enqueue");
+ - + - -
+ - + - -
- - ]
1536 : : else
1537 [ + - + - : 144 : inc_metric("fdcache_op_count","op","prefetch_enqueue");
+ - + - -
+ + - - -
- - ]
1538 : :
1539 [ + + ]: 210 : if (verbose > 3)
1540 [ + - + - : 534 : obatched(clog) << "fdcache interned a=" << a << " b=" << b
- - ]
1541 [ + - + - : 178 : << " fd=" << fd << " mb=" << mb << " front=" << requested_p
+ - + - +
- + - + -
+ - ]
1542 [ + - + - : 178 : << " latency=" << lat << endl;
+ - ]
1543 : :
1544 [ + - ]: 210 : set_metrics();
1545 : 210 : }
1546 : :
1547 : : // NB: we age the cache at lookup time too
1548 [ + - - + : 210 : if (statfs_free_enough_p(tmpdir, "tmpdir", fdcache_mintmp))
- + ]
1549 : : {
1550 [ # # # # : 0 : inc_metric("fdcache_op_count","op","emerg-flush");
# # # # #
# # # #
# ]
1551 [ # # ]: 0 : obatched(clog) << "fdcache emergency flush for filling tmpdir" << endl;
1552 : 0 : this->limit(0); // emergency flush
1553 : : }
1554 : : else // age cache normally
1555 : 210 : this->limit(max_mbs);
1556 : : }
1557 : :
1558 : 744 : int lookup(const string& a, const string& b)
1559 : : {
1560 : 744 : int fd = -1;
1561 : 744 : {
1562 : 744 : unique_lock<mutex> lock(fdcache_lock);
1563 [ + - ]: 744 : auto i = entries.find(make_pair(a,b));
1564 [ + + ]: 744 : if (i != entries.end())
1565 : : {
1566 [ + + ]: 578 : if (i->second.request_count == 0) // was a prefetch!
1567 : : {
1568 [ + - + - ]: 16 : inc_metric("fdcache_prefetch_saved_milliseconds_count");
1569 [ + - + - ]: 32 : add_metric("fdcache_prefetch_saved_milliseconds_sum", i->second.latency*1000.);
1570 : : }
1571 : 578 : i->second.request_count ++;
1572 : 578 : i->second.freshness = time(NULL);
1573 : : // brag about our success
1574 [ + - + - : 1156 : inc_metric("fdcache_op_count","op","prefetch_access"); // backward compat
+ - + - -
+ - + - -
- - ]
1575 [ + - + - ]: 578 : inc_metric("fdcache_saved_milliseconds_count");
1576 [ + - + - ]: 578 : add_metric("fdcache_saved_milliseconds_sum", i->second.latency*1000.);
1577 [ + - ]: 744 : fd = open(i->second.fd.c_str(), O_RDONLY);
1578 : : }
1579 : 0 : }
1580 : :
1581 [ + + ]: 744 : if (fd >= 0)
1582 [ + - + - : 1156 : inc_metric("fdcache_op_count","op","lookup_hit");
+ - - + -
+ - - -
- ]
1583 : : else
1584 [ + - + - : 332 : inc_metric("fdcache_op_count","op","lookup_miss");
+ - - + -
+ - - -
- ]
1585 : :
1586 : : // NB: no need to age the cache after just a lookup
1587 : :
1588 : 744 : return fd;
1589 : : }
1590 : :
1591 : 228 : int probe(const string& a, const string& b) // just a cache residency check - don't modify state, don't open
1592 : : {
1593 : 228 : unique_lock<mutex> lock(fdcache_lock);
1594 [ + - ]: 228 : auto i = entries.find(make_pair(a,b));
1595 [ + + ]: 228 : if (i != entries.end()) {
1596 [ + - + - : 52 : inc_metric("fdcache_op_count","op","probe_hit");
+ - + - -
+ - + - -
- - ]
1597 : 26 : return true;
1598 : : } else {
1599 [ + - + - : 404 : inc_metric("fdcache_op_count","op","probe_miss");
+ - + - -
+ - + - -
- - ]
1600 : 202 : return false;
1601 : : }
1602 : 228 : }
1603 : :
1604 : 0 : void clear(const string& a, const string& b)
1605 : : {
1606 : 0 : unique_lock<mutex> lock(fdcache_lock);
1607 [ # # ]: 0 : auto i = entries.find(make_pair(a,b));
1608 [ # # ]: 0 : if (i != entries.end()) {
1609 [ # # # # : 0 : inc_metric("fdcache_op_count","op",
# # # # #
# # # # #
# # # # ]
1610 [ # # ]: 0 : i->second.request_count > 0 ? "clear" : "prefetch_clear");
1611 : 0 : unlink (i->second.fd.c_str());
1612 : 0 : entries.erase(i);
1613 [ # # ]: 0 : set_metrics();
1614 : 0 : return;
1615 : : }
1616 : 0 : }
1617 : :
1618 : 362 : void limit(long maxmbs, bool metrics_p = true)
1619 : : {
1620 : 362 : time_t now = time(NULL);
1621 : :
1622 : : // avoid overly frequent limit operations
1623 [ + + + + ]: 362 : if (maxmbs > 0 && (now - this->last_cleaning) < 10) // probably not worth parametrizing
1624 : 210 : return;
1625 : 152 : this->last_cleaning = now;
1626 : :
1627 [ + + + - ]: 152 : if (verbose > 3 && (this->max_mbs != maxmbs))
1628 [ + - + - ]: 184 : obatched(clog) << "fdcache limited to maxmbs=" << maxmbs << endl;
1629 : :
1630 : 152 : unique_lock<mutex> lock(fdcache_lock);
1631 : :
1632 : 152 : this->max_mbs = maxmbs;
1633 : 152 : double total_mb = 0.0;
1634 : :
1635 : 152 : map<double, pair<string,string>> sorted_entries;
1636 [ + + ]: 362 : for (auto &i: entries)
1637 : : {
1638 : 210 : total_mb += i.second.fd_size_mb;
1639 : :
1640 : : // need a scalar quantity that combines these inputs in a sensible way:
1641 : : //
1642 : : // 1) freshness of this entry (last time it was accessed)
1643 : : // 2) size of this entry
1644 : : // 3) number of times it has been accessed (or if just prefetched with 0 accesses)
1645 : : // 4) latency it required to extract
1646 : : //
1647 : : // The lower the "score", the earlier garbage collection will
1648 : : // nuke it, so to prioritize entries for preservation, the
1649 : : // score should be higher, and vice versa.
1650 : 210 : time_t factor_1_freshness = (now - i.second.freshness); // seconds
1651 : 210 : double factor_2_size = i.second.fd_size_mb; // megabytes
1652 : 210 : unsigned factor_3_accesscount = i.second.request_count; // units
1653 : 210 : double factor_4_latency = i.second.latency; // seconds
1654 : :
1655 : : #if 0
1656 : : double score = - factor_1_freshness; // simple LRU
1657 : : #endif
1658 : :
1659 [ + + ]: 210 : double score = 0.
1660 : 210 : - log1p(factor_1_freshness) // penalize old file
1661 : 210 : - log1p(factor_2_size) // penalize large file
1662 : 210 : + factor_4_latency * factor_3_accesscount; // reward slow + repeatedly read files
1663 : :
1664 [ + + ]: 210 : if (verbose > 4)
1665 [ + - ]: 64 : obatched(clog) << "fdcache scored score=" << score
1666 [ + - + - ]: 64 : << " a=" << i.first.first << " b=" << i.first.second
1667 [ + - + - : 96 : << " f1=" << factor_1_freshness << " f2=" << factor_2_size
+ - + - +
- + - +
- ]
1668 [ + - + - : 32 : << " f3=" << factor_3_accesscount << " f4=" << factor_4_latency
+ - + - +
- ]
1669 : 32 : << endl;
1670 : :
1671 [ + - + - ]: 420 : sorted_entries.insert(make_pair(score, i.first));
1672 : : }
1673 : :
1674 : 152 : unsigned cleaned = 0;
1675 : 152 : unsigned entries_original = entries.size();
1676 : 152 : double cleaned_score_min = DBL_MAX;
1677 : 152 : double cleaned_score_max = DBL_MIN;
1678 : :
1679 : : // drop as many entries[] as needed to bring total mb down to the threshold
1680 [ + + ]: 362 : for (auto &i: sorted_entries) // in increasing score order!
1681 : : {
1682 [ - + ]: 210 : if (this->max_mbs > 0 // if this is not a "clear entire table"
1683 [ # # ]: 0 : && total_mb < this->max_mbs) // we've cleared enough to meet threshold
1684 : : break; // stop clearing
1685 : :
1686 [ - + ]: 210 : auto j = entries.find(i.second);
1687 [ - + ]: 210 : if (j == entries.end())
1688 : 0 : continue; // should not happen
1689 : :
1690 [ + + ]: 210 : if (cleaned == 0)
1691 : 34 : cleaned_score_min = i.first;
1692 : 210 : cleaned++;
1693 : 210 : cleaned_score_max = i.first;
1694 : :
1695 [ + + ]: 210 : if (verbose > 3)
1696 [ + - + - ]: 534 : obatched(clog) << "fdcache evicted score=" << i.first
1697 [ + - + - ]: 356 : << " a=" << i.second.first << " b=" << i.second.second
1698 [ + - + - : 534 : << " fd=" << j->second.fd << " mb=" << j->second.fd_size_mb
+ - + - +
- + - ]
1699 [ + - + - : 178 : << " rq=" << j->second.request_count << " lat=" << j->second.latency
+ - + - ]
1700 [ + - + - : 178 : << " fr=" << (now - j->second.freshness)
+ - ]
1701 : 178 : << endl;
1702 [ - + ]: 210 : if (metrics_p)
1703 [ # # # # : 0 : inc_metric("fdcache_op_count","op","evict");
# # # # #
# # # # #
# # ]
1704 : :
1705 : 210 : total_mb -= j->second.fd_size_mb;
1706 : 210 : unlink (j->second.fd.c_str());
1707 : 210 : entries.erase(j);
1708 : : }
1709 : :
1710 [ + + ]: 152 : if (metrics_p)
1711 [ + - + - : 152 : inc_metric("fdcache_op_count","op","evict_cycle");
+ - + - -
+ - + - -
- - ]
1712 : :
1713 [ + - + + ]: 152 : if (verbose > 1 && cleaned > 0)
1714 : : {
1715 [ + - + - : 102 : obatched(clog) << "fdcache evicted num=" << cleaned << " of=" << entries_original
+ - + - ]
1716 [ + - + - : 34 : << " min=" << cleaned_score_min << " max=" << cleaned_score_max
+ - + - +
- ]
1717 : 34 : << endl;
1718 : : }
1719 : :
1720 [ + + + - ]: 152 : if (metrics_p) set_metrics();
1721 : 152 : }
1722 : :
1723 : :
1724 : 76 : ~libarchive_fdcache()
1725 : : {
1726 : : // unlink any fdcache entries in $TMPDIR
1727 : : // don't update metrics; those globals may be already destroyed
1728 : 76 : limit(0, false);
1729 : 76 : }
1730 : : };
1731 : : static libarchive_fdcache fdcache;
1732 : :
1733 : : /* Search ELF_FD for an ELF/DWARF section with name SECTION.
1734 : : If found copy the section to a temporary file and return
1735 : : its file descriptor, otherwise return -1.
1736 : :
1737 : : The temporary file's mtime will be set to PARENT_MTIME.
1738 : : B_SOURCE should be a description of the parent file suitable
1739 : : for printing to the log. */
1740 : :
1741 : : static int
1742 : 12 : extract_section (int elf_fd, int64_t parent_mtime,
1743 : : const string& b_source, const string& section,
1744 : : const timespec& extract_begin)
1745 : : {
1746 : : /* Search the fdcache. */
1747 : 12 : struct stat fs;
1748 : 12 : int fd = fdcache.lookup (b_source, section);
1749 [ - + ]: 12 : if (fd >= 0)
1750 : : {
1751 [ # # ]: 0 : if (fstat (fd, &fs) != 0)
1752 : : {
1753 [ # # ]: 0 : if (verbose)
1754 [ # # ]: 0 : obatched (clog) << "cannot fstate fdcache "
1755 [ # # # # : 0 : << b_source << " " << section << endl;
# # ]
1756 : 0 : close (fd);
1757 : 0 : return -1;
1758 : : }
1759 [ # # ]: 0 : if ((int64_t) fs.st_mtime != parent_mtime)
1760 : : {
1761 [ # # ]: 0 : if (verbose)
1762 [ # # ]: 0 : obatched(clog) << "mtime mismatch for "
1763 [ # # # # : 0 : << b_source << " " << section << endl;
# # ]
1764 : 0 : close (fd);
1765 : 0 : return -1;
1766 : : }
1767 : : /* Success. */
1768 : : return fd;
1769 : : }
1770 : :
1771 : 12 : Elf *elf = elf_begin (elf_fd, ELF_C_READ_MMAP_PRIVATE, NULL);
1772 [ - + ]: 12 : if (elf == NULL)
1773 : : return -1;
1774 : :
1775 : : /* Try to find the section and copy the contents into a separate file. */
1776 : 12 : try
1777 : : {
1778 : 12 : size_t shstrndx;
1779 [ + - ]: 12 : int rc = elf_getshdrstrndx (elf, &shstrndx);
1780 [ - + ]: 12 : if (rc < 0)
1781 [ # # # # ]: 0 : throw elfutils_exception (rc, "getshdrstrndx");
1782 : :
1783 : : Elf_Scn *scn = NULL;
1784 : 432 : while (true)
1785 : : {
1786 [ + - ]: 222 : scn = elf_nextscn (elf, scn);
1787 [ + - ]: 222 : if (scn == NULL)
1788 : : break;
1789 : 222 : GElf_Shdr shdr_storage;
1790 [ + - ]: 222 : GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_storage);
1791 [ + - ]: 222 : if (shdr == NULL)
1792 : : break;
1793 : :
1794 [ + - ]: 222 : const char *scn_name = elf_strptr (elf, shstrndx, shdr->sh_name);
1795 [ + - ]: 222 : if (scn_name == NULL)
1796 : : break;
1797 [ + + ]: 222 : if (scn_name == section)
1798 : : {
1799 : 12 : Elf_Data *data = NULL;
1800 : :
1801 : : /* We found the desired section. */
1802 [ + - ]: 12 : data = elf_rawdata (scn, NULL);
1803 [ - + ]: 12 : if (data == NULL)
1804 [ # # # # : 0 : throw elfutils_exception (elf_errno (), "elfraw_data");
# # ]
1805 [ + + ]: 12 : if (data->d_buf == NULL)
1806 : : {
1807 [ + - + - ]: 8 : obatched(clog) << "section " << section
1808 [ + - + - ]: 4 : << " is empty" << endl;
1809 : 4 : break;
1810 : : }
1811 : :
1812 : : /* Create temporary file containing the section. */
1813 : 8 : char *tmppath = NULL;
1814 : 8 : rc = asprintf (&tmppath, "%s/debuginfod-section.XXXXXX", tmpdir.c_str());
1815 [ - + ]: 8 : if (rc < 0)
1816 [ # # # # ]: 0 : throw libc_exception (ENOMEM, "cannot allocate tmppath");
1817 : 8 : defer_dtor<void*,void> tmmpath_freer (tmppath, free);
1818 [ + - ]: 8 : fd = mkstemp (tmppath);
1819 [ - + ]: 8 : if (fd < 0)
1820 [ # # # # ]: 0 : throw libc_exception (errno, "cannot create temporary file");
1821 : :
1822 [ + - ]: 8 : ssize_t res = write_retry (fd, data->d_buf, data->d_size);
1823 [ + - - + ]: 8 : if (res < 0 || (size_t) res != data->d_size) {
1824 [ # # ]: 0 : close (fd);
1825 : 0 : unlink (tmppath);
1826 [ # # # # ]: 0 : throw libc_exception (errno, "cannot write to temporary file");
1827 : : }
1828 : :
1829 : : /* Set mtime to be the same as the parent file's mtime. */
1830 : 8 : struct timespec tvs[2];
1831 [ - + ]: 8 : if (fstat (elf_fd, &fs) != 0) {
1832 [ # # ]: 0 : close (fd);
1833 : 0 : unlink (tmppath);
1834 [ # # # # ]: 0 : throw libc_exception (errno, "cannot fstat file");
1835 : : }
1836 : :
1837 : 8 : tvs[0].tv_sec = 0;
1838 : 8 : tvs[0].tv_nsec = UTIME_OMIT;
1839 : 8 : tvs[1] = fs.st_mtim;
1840 : 8 : (void) futimens (fd, tvs);
1841 : :
1842 : 8 : struct timespec extract_end;
1843 : 8 : clock_gettime (CLOCK_MONOTONIC, &extract_end);
1844 : 8 : double extract_time = (extract_end.tv_sec - extract_begin.tv_sec)
1845 : 8 : + (extract_end.tv_nsec - extract_begin.tv_nsec)/1.e9;
1846 : :
1847 : : /* Add to fdcache. */
1848 [ + - + - ]: 8 : fdcache.intern (b_source, section, tmppath, data->d_size, true, extract_time);
1849 : 8 : break;
1850 : 12 : }
1851 : 210 : }
1852 : : }
1853 [ - - ]: 0 : catch (const reportable_exception &e)
1854 : : {
1855 [ - - ]: 0 : e.report (clog);
1856 [ - - ]: 0 : close (fd);
1857 : 0 : fd = -1;
1858 : 0 : }
1859 : :
1860 : 12 : elf_end (elf);
1861 : : return fd;
1862 : : }
1863 : :
1864 : : static struct MHD_Response*
1865 : 74 : handle_buildid_f_match (bool internal_req_t,
1866 : : int64_t b_mtime,
1867 : : const string& b_source0,
1868 : : const string& section,
1869 : : int *result_fd)
1870 : : {
1871 : 74 : (void) internal_req_t; // ignored
1872 : :
1873 : 74 : struct timespec extract_begin;
1874 : 74 : clock_gettime (CLOCK_MONOTONIC, &extract_begin);
1875 : :
1876 : 74 : int fd = open(b_source0.c_str(), O_RDONLY);
1877 [ - + ]: 74 : if (fd < 0)
1878 [ # # # # : 0 : throw libc_exception (errno, string("open ") + b_source0);
# # # # ]
1879 : :
1880 : : // NB: use manual close(2) in error case instead of defer_dtor, because
1881 : : // in the normal case, we want to hand the fd over to libmicrohttpd for
1882 : : // file transfer.
1883 : :
1884 : 74 : struct stat s;
1885 : 74 : int rc = fstat(fd, &s);
1886 [ - + ]: 74 : if (rc < 0)
1887 : : {
1888 : 0 : close(fd);
1889 [ # # # # : 0 : throw libc_exception (errno, string("fstat ") + b_source0);
# # # # ]
1890 : : }
1891 : :
1892 [ - + ]: 74 : if ((int64_t) s.st_mtime != b_mtime)
1893 : : {
1894 [ # # ]: 0 : if (verbose)
1895 [ # # # # ]: 0 : obatched(clog) << "mtime mismatch for " << b_source0 << endl;
1896 : 0 : close(fd);
1897 : 0 : return 0;
1898 : : }
1899 : :
1900 [ + + ]: 74 : if (!section.empty ())
1901 : : {
1902 : 6 : int scn_fd = extract_section (fd, s.st_mtime, b_source0, section, extract_begin);
1903 : 6 : close (fd);
1904 : :
1905 [ + + ]: 6 : if (scn_fd >= 0)
1906 : 4 : fd = scn_fd;
1907 : : else
1908 : : {
1909 [ + - ]: 2 : if (verbose)
1910 [ + - ]: 6 : obatched (clog) << "cannot find section " << section
1911 [ + - + - : 2 : << " for " << b_source0 << endl;
+ - ]
1912 : 2 : return 0;
1913 : : }
1914 : :
1915 : 4 : rc = fstat(fd, &s);
1916 [ - + ]: 4 : if (rc < 0)
1917 : : {
1918 : 0 : close (fd);
1919 [ # # # # : 0 : throw libc_exception (errno, string ("fstat ") + b_source0
# # # # #
# # # #
# ]
1920 [ # # # # : 0 : + string (" ") + section);
# # # # #
# ]
1921 : : }
1922 : : }
1923 : :
1924 : 72 : struct MHD_Response* r = MHD_create_response_from_fd ((uint64_t) s.st_size, fd);
1925 [ + - + - : 144 : inc_metric ("http_responses_total","result","file");
+ - - + -
+ - - -
- ]
1926 [ - + ]: 72 : if (r == 0)
1927 : : {
1928 [ # # ]: 0 : if (verbose)
1929 [ # # ]: 0 : obatched(clog) << "cannot create fd-response for " << b_source0
1930 [ # # # # : 0 : << " section=" << section << endl;
# # ]
1931 : 0 : close(fd);
1932 : : }
1933 : : else
1934 : : {
1935 : 72 : add_mhd_response_header (r, "Content-Type", "application/octet-stream");
1936 [ + - ]: 72 : add_mhd_response_header (r, "X-DEBUGINFOD-SIZE",
1937 : 72 : to_string(s.st_size).c_str());
1938 : 72 : add_mhd_response_header (r, "X-DEBUGINFOD-FILE", b_source0.c_str());
1939 : 72 : add_mhd_last_modified (r, s.st_mtime);
1940 [ + - ]: 72 : if (verbose > 1)
1941 [ + - + - : 144 : obatched(clog) << "serving file " << b_source0 << " section=" << section << endl;
+ - + - ]
1942 : : /* libmicrohttpd will close it. */
1943 [ - + ]: 72 : if (result_fd)
1944 : 72 : *result_fd = fd;
1945 : : }
1946 : :
1947 : : return r;
1948 : : }
1949 : :
1950 : : // For security/portability reasons, many distro-package archives have
1951 : : // a "./" in front of path names; others have nothing, others have
1952 : : // "/". Canonicalize them all to a single leading "/", with the
1953 : : // assumption that this matches the dwarf-derived file names too.
1954 : 1002 : string canonicalized_archive_entry_pathname(struct archive_entry *e)
1955 : : {
1956 : 1002 : string fn = archive_entry_pathname(e);
1957 [ - + ]: 1002 : if (fn.size() == 0)
1958 : 0 : return fn;
1959 [ - + ]: 1002 : if (fn[0] == '/')
1960 : 0 : return fn;
1961 [ + + ]: 1002 : if (fn[0] == '.')
1962 [ + - ]: 800 : return fn.substr(1);
1963 : : else
1964 [ + - + - : 404 : return string("/")+fn;
- - ]
1965 : 1002 : }
1966 : :
1967 : :
1968 : :
1969 : : static struct MHD_Response*
1970 : 790 : handle_buildid_r_match (bool internal_req_p,
1971 : : int64_t b_mtime,
1972 : : const string& b_source0,
1973 : : const string& b_source1,
1974 : : const string& section,
1975 : : int *result_fd)
1976 : : {
1977 : 790 : struct timespec extract_begin;
1978 : 790 : clock_gettime (CLOCK_MONOTONIC, &extract_begin);
1979 : :
1980 : 790 : struct stat fs;
1981 : 790 : int rc = stat (b_source0.c_str(), &fs);
1982 [ + + ]: 790 : if (rc != 0)
1983 [ + - + - : 116 : throw libc_exception (errno, string("stat ") + b_source0);
+ - - + ]
1984 : :
1985 [ - + ]: 732 : if ((int64_t) fs.st_mtime != b_mtime)
1986 : : {
1987 [ # # ]: 0 : if (verbose)
1988 [ # # # # ]: 0 : obatched(clog) << "mtime mismatch for " << b_source0 << endl;
1989 : 0 : return 0;
1990 : : }
1991 : :
1992 : : // Extract the IMA per-file signature (if it exists)
1993 : 732 : string ima_sig = "";
1994 : : #ifdef ENABLE_IMA_VERIFICATION
1995 : : do
1996 : : {
1997 : : FD_t rpm_fd;
1998 : : if(!(rpm_fd = Fopen(b_source0.c_str(), "r.ufdio"))) // read, uncompressed, rpm/rpmio.h
1999 : : {
2000 : : if (verbose) obatched(clog) << "There was an error while opening " << b_source0 << endl;
2001 : : break; // Exit IMA extraction
2002 : : }
2003 : :
2004 : : Header rpm_hdr;
2005 : : if(RPMRC_FAIL == rpmReadPackageFile(NULL, rpm_fd, b_source0.c_str(), &rpm_hdr))
2006 : : {
2007 : : if (verbose) obatched(clog) << "There was an error while reading the header of " << b_source0 << endl;
2008 : : Fclose(rpm_fd);
2009 : : break; // Exit IMA extraction
2010 : : }
2011 : :
2012 : : // Fill sig_tag_data with an alloc'd copy of the array of IMA signatures (if they exist)
2013 : : struct rpmtd_s sig_tag_data;
2014 : : rpmtdReset(&sig_tag_data);
2015 : : do{ /* A do-while so we can break out of the koji sigcache checking on failure */
2016 : : if(requires_koji_sigcache_mapping)
2017 : : {
2018 : : /* NB: Koji builds result in a directory structure like the following
2019 : : - PACKAGE/VERSION/RELEASE
2020 : : - ARCH1
2021 : : - foo.rpm // The rpm known by debuginfod
2022 : : - ...
2023 : : - ARCHN
2024 : : - data
2025 : : - signed // Periodically purged (and not scanned by debuginfod)
2026 : : - sigcache
2027 : : - ARCH1
2028 : : - foo.rpm.sig // An empty rpm header
2029 : : - ...
2030 : : - ARCHN
2031 : : - PACKAGE_KEYID1
2032 : : - ARCH1
2033 : : - foo.rpm.sig // The header of the signed rpm. This is the file we need to extract the IMA signatures
2034 : : - ...
2035 : : - ARCHN
2036 : : - ...
2037 : : - PACKAGE_KEYIDn
2038 : :
2039 : : We therefore need to do a mapping:
2040 : :
2041 : : P/V/R/A/N-V-R.A.rpm ->
2042 : : P/V/R/data/sigcache/KEYID/A/N-V-R.A.rpm.sig
2043 : :
2044 : : There are 2 key insights here
2045 : :
2046 : : 1. We need to go 2 directories down from sigcache to get to the
2047 : : rpm header. So to distinguish ARCH1/foo.rpm.sig and
2048 : : PACKAGE_KEYID1/ARCH1/foo.rpm.sig we can look 2 directories down
2049 : :
2050 : : 2. It's safe to assume that the user will have all of the
2051 : : required verification certs. So we can pick from any of the
2052 : : PACKAGE_KEYID* directories. For simplicity we choose first we
2053 : : match against
2054 : :
2055 : : See: https://pagure.io/koji/issue/3670
2056 : : */
2057 : :
2058 : : // Do the mapping from b_source0 to the koji path for the signed rpm header
2059 : : string signed_rpm_path = b_source0;
2060 : : size_t insert_pos = string::npos;
2061 : : for(int i = 0; i < 2; i++) insert_pos = signed_rpm_path.rfind("/", insert_pos) - 1;
2062 : : string globbed_path = signed_rpm_path.insert(insert_pos + 1, "/data/sigcache/*").append(".sig"); // The globbed path we're seeking
2063 : : glob_t pglob;
2064 : : int grc;
2065 : : if(0 != (grc = glob(globbed_path.c_str(), GLOB_NOSORT, NULL, &pglob)))
2066 : : {
2067 : : // Break out, but only report real errors
2068 : : if (verbose && grc != GLOB_NOMATCH) obatched(clog) << "There was an error (" << strerror(errno) << ") globbing " << globbed_path << endl;
2069 : : break; // Exit koji sigcache check
2070 : : }
2071 : : signed_rpm_path = pglob.gl_pathv[0]; // See insight 2 above
2072 : : globfree(&pglob);
2073 : :
2074 : : if (verbose > 2) obatched(clog) << "attempting IMA signature extraction from koji header " << signed_rpm_path << endl;
2075 : :
2076 : : FD_t sig_rpm_fd;
2077 : : if(NULL == (sig_rpm_fd = Fopen(signed_rpm_path.c_str(), "r")))
2078 : : {
2079 : : if (verbose) obatched(clog) << "There was an error while opening " << signed_rpm_path << endl;
2080 : : break; // Exit koji sigcache check
2081 : : }
2082 : :
2083 : : Header sig_hdr = headerRead(sig_rpm_fd, HEADER_MAGIC_YES /* Validate magic too */ );
2084 : : if (!sig_hdr || 1 != headerGet(sig_hdr, RPMSIGTAG_FILESIGNATURES, &sig_tag_data, HEADERGET_ALLOC))
2085 : : {
2086 : : if (verbose) obatched(clog) << "Unable to extract RPMSIGTAG_FILESIGNATURES from " << signed_rpm_path << endl;
2087 : : }
2088 : : headerFree(sig_hdr); // We can free here since sig_tag_data has an alloc'd copy of the data
2089 : : Fclose(sig_rpm_fd);
2090 : : }
2091 : : }while(false);
2092 : :
2093 : : if(0 == sig_tag_data.count)
2094 : : {
2095 : : // In the general case (or a fallback from the koji sigcache mapping not finding signatures)
2096 : : // we can just (try) extract the signatures from the rpm header
2097 : : if (1 != headerGet(rpm_hdr, RPMTAG_FILESIGNATURES, &sig_tag_data, HEADERGET_ALLOC))
2098 : : {
2099 : : if (verbose) obatched(clog) << "Unable to extract RPMTAG_FILESIGNATURES from " << b_source0 << endl;
2100 : : }
2101 : : }
2102 : : // Search the array for the signature coresponding to b_source1
2103 : : int idx = -1;
2104 : : char *sig = NULL;
2105 : : rpmfi hdr_fi = rpmfiNew(NULL, rpm_hdr, RPMTAG_BASENAMES, RPMFI_FLAGS_QUERY);
2106 : : do
2107 : : {
2108 : : sig = (char*)rpmtdNextString(&sig_tag_data);
2109 : : idx = rpmfiNext(hdr_fi);
2110 : : }
2111 : : while (idx != -1 && 0 != strcmp(b_source1.c_str(), rpmfiFN(hdr_fi)));
2112 : : rpmfiFree(hdr_fi);
2113 : :
2114 : : if(sig && 0 != strlen(sig) && idx != -1)
2115 : : {
2116 : : if (verbose > 2) obatched(clog) << "Found IMA signature for " << b_source1 << ":\n" << sig << endl;
2117 : : ima_sig = sig;
2118 : : inc_metric("http_responses_total","extra","ima-sigs-extracted");
2119 : : }
2120 : : else
2121 : : {
2122 : : if (verbose > 2) obatched(clog) << "Could not find IMA signature for " << b_source1 << endl;
2123 : : }
2124 : :
2125 : : rpmtdFreeData (&sig_tag_data);
2126 : : headerFree(rpm_hdr);
2127 : : Fclose(rpm_fd);
2128 : : } while(false);
2129 : : #endif
2130 : :
2131 : : // check for a match in the fdcache first
2132 [ + - ]: 732 : int fd = fdcache.lookup(b_source0, b_source1);
2133 [ + + ]: 732 : while (fd >= 0) // got one!; NB: this is really an if() with a possible branch out to the end
2134 : : {
2135 : 578 : rc = fstat(fd, &fs);
2136 [ - + ]: 578 : if (rc < 0) // disappeared?
2137 : : {
2138 [ # # ]: 0 : if (verbose)
2139 [ # # # # : 0 : obatched(clog) << "cannot fstat fdcache " << b_source0 << endl;
# # ]
2140 [ # # ]: 0 : close(fd);
2141 [ # # ]: 0 : fdcache.clear(b_source0, b_source1);
2142 : : break; // branch out of if "loop", to try new libarchive fetch attempt
2143 : : }
2144 : :
2145 [ + + ]: 578 : if (!section.empty ())
2146 : : {
2147 [ + - + - ]: 2 : int scn_fd = extract_section (fd, fs.st_mtime,
2148 [ + - + - : 4 : b_source0 + ":" + b_source1,
- + ]
2149 : : section, extract_begin);
2150 [ + - ]: 2 : close (fd);
2151 [ - + ]: 2 : if (scn_fd >= 0)
2152 : 0 : fd = scn_fd;
2153 : : else
2154 : : {
2155 [ + - ]: 2 : if (verbose)
2156 [ + - + - ]: 6 : obatched (clog) << "cannot find section " << section
2157 : : << " for archive " << b_source0
2158 [ + - + - : 2 : << " file " << b_source1 << endl;
+ - + - +
- ]
2159 : 2 : return 0;
2160 : : }
2161 : :
2162 : 0 : rc = fstat(fd, &fs);
2163 [ # # ]: 0 : if (rc < 0)
2164 : : {
2165 [ # # ]: 0 : close (fd);
2166 [ # # # # ]: 0 : throw libc_exception (errno,
2167 [ # # # # : 0 : string ("fstat archive ") + b_source0 + string (" file ") + b_source1
# # # # #
# # # # #
# # # # #
# # # #
# ]
2168 [ # # # # : 0 : + string (" section ") + section);
# # # # #
# ]
2169 : : }
2170 : : }
2171 : :
2172 [ + - ]: 576 : struct MHD_Response* r = MHD_create_response_from_fd (fs.st_size, fd);
2173 [ - + ]: 576 : if (r == 0)
2174 : : {
2175 [ # # ]: 0 : if (verbose)
2176 [ # # # # : 0 : obatched(clog) << "cannot create fd-response for " << b_source0 << endl;
# # ]
2177 [ # # ]: 0 : close(fd);
2178 : : break; // branch out of if "loop", to try new libarchive fetch attempt
2179 : : }
2180 : :
2181 [ + - + - : 1152 : inc_metric ("http_responses_total","result","archive fdcache");
+ - + - -
+ - + - -
- - ]
2182 : :
2183 [ + - ]: 576 : add_mhd_response_header (r, "Content-Type", "application/octet-stream");
2184 [ + - ]: 576 : add_mhd_response_header (r, "X-DEBUGINFOD-SIZE",
2185 [ + - ]: 576 : to_string(fs.st_size).c_str());
2186 [ + - ]: 576 : add_mhd_response_header (r, "X-DEBUGINFOD-ARCHIVE", b_source0.c_str());
2187 [ + - ]: 576 : add_mhd_response_header (r, "X-DEBUGINFOD-FILE", b_source1.c_str());
2188 [ - + - - ]: 576 : if(!ima_sig.empty()) add_mhd_response_header(r, "X-DEBUGINFOD-IMASIGNATURE", ima_sig.c_str());
2189 [ + - ]: 576 : add_mhd_last_modified (r, fs.st_mtime);
2190 [ + - ]: 576 : if (verbose > 1)
2191 [ + - + - ]: 1728 : obatched(clog) << "serving fdcache archive " << b_source0
2192 : : << " file " << b_source1
2193 : : << " section=" << section
2194 [ + - + - : 576 : << " IMA signature=" << ima_sig << endl;
+ - + - +
- + - +
- ]
2195 : : /* libmicrohttpd will close it. */
2196 [ + - ]: 576 : if (result_fd)
2197 : 576 : *result_fd = fd;
2198 : : return r;
2199 : : // NB: see, we never go around the 'loop' more than once
2200 : : }
2201 : :
2202 : : // no match ... grumble, must process the archive
2203 [ + - - - ]: 154 : string archive_decoder = "/dev/null";
2204 [ + - - - ]: 154 : string archive_extension = "";
2205 [ + + ]: 346 : for (auto&& arch : scan_archives)
2206 [ + + ]: 192 : if (string_endswith(b_source0, arch.first))
2207 : : {
2208 [ + - ]: 154 : archive_extension = arch.first;
2209 [ + - ]: 346 : archive_decoder = arch.second;
2210 : : }
2211 : 154 : FILE* fp;
2212 : :
2213 : 154 : defer_dtor<FILE*,int>::dtor_fn dfn;
2214 [ + + ]: 154 : if (archive_decoder != "cat")
2215 : : {
2216 [ + - + - : 48 : string popen_cmd = archive_decoder + " " + shell_escape(b_source0);
+ - - + -
- - - ]
2217 [ + - ]: 24 : fp = popen (popen_cmd.c_str(), "r"); // "e" O_CLOEXEC?
2218 : 24 : dfn = pclose;
2219 [ - + ]: 24 : if (fp == NULL)
2220 [ # # # # : 0 : throw libc_exception (errno, string("popen ") + popen_cmd);
# # # # ]
2221 : 24 : }
2222 : : else
2223 : : {
2224 [ + - ]: 130 : fp = fopen (b_source0.c_str(), "r");
2225 : 130 : dfn = fclose;
2226 [ - + ]: 130 : if (fp == NULL)
2227 [ # # # # : 0 : throw libc_exception (errno, string("fopen ") + b_source0);
# # # # ]
2228 : : }
2229 : 154 : defer_dtor<FILE*,int> fp_closer (fp, dfn);
2230 : :
2231 : 154 : struct archive *a;
2232 [ + - ]: 154 : a = archive_read_new();
2233 [ - + ]: 154 : if (a == NULL)
2234 [ # # # # ]: 0 : throw archive_exception("cannot create archive reader");
2235 : 154 : defer_dtor<struct archive*,int> archive_closer (a, archive_read_free);
2236 : :
2237 [ + - ]: 154 : rc = archive_read_support_format_all(a);
2238 [ - + ]: 154 : if (rc != ARCHIVE_OK)
2239 [ # # # # ]: 0 : throw archive_exception(a, "cannot select all format");
2240 [ + - ]: 154 : rc = archive_read_support_filter_all(a);
2241 [ - + ]: 154 : if (rc != ARCHIVE_OK)
2242 [ # # # # ]: 0 : throw archive_exception(a, "cannot select all filters");
2243 : :
2244 [ + - ]: 154 : rc = archive_read_open_FILE (a, fp);
2245 [ - + ]: 154 : if (rc != ARCHIVE_OK)
2246 : : {
2247 [ # # # # : 0 : obatched(clog) << "cannot open archive from pipe " << b_source0 << endl;
# # ]
2248 [ # # # # ]: 0 : throw archive_exception(a, "cannot open archive from pipe");
2249 : : }
2250 : :
2251 : : // archive traversal is in three stages, no, four stages:
2252 : : // 1) skip entries whose names do not match the requested one
2253 : : // 2) extract the matching entry name (set r = result)
2254 : : // 3) extract some number of prefetched entries (just into fdcache)
2255 : : // 4) abort any further processing
2256 : 154 : struct MHD_Response* r = 0; // will set in stage 2
2257 [ + + ]: 154 : unsigned prefetch_count =
2258 : : internal_req_p ? 0 : fdcache_prefetch; // will decrement in stage 3
2259 : :
2260 [ + + ]: 1562 : while(r == 0 || prefetch_count > 0) // stage 1, 2, or 3
2261 : : {
2262 [ + - ]: 1540 : if (interrupted)
2263 : : break;
2264 : :
2265 : 1540 : struct archive_entry *e;
2266 [ + - ]: 1540 : rc = archive_read_next_header (a, &e);
2267 [ + + ]: 1540 : if (rc != ARCHIVE_OK)
2268 : : break;
2269 : :
2270 [ + - + + ]: 1408 : if (! S_ISREG(archive_entry_mode (e))) // skip non-files completely
2271 : 1106 : continue;
2272 : :
2273 [ + - ]: 302 : string fn = canonicalized_archive_entry_pathname (e);
2274 [ + + + + ]: 302 : if ((r == 0) && (fn != b_source1)) // stage 1
2275 : 76 : continue;
2276 : :
2277 [ + - + + ]: 226 : if (fdcache.probe (b_source0, fn) && // skip if already interned
2278 [ - + ]: 24 : fn != b_source1) // but only if we'd just be prefetching, PR29474
2279 : 24 : continue;
2280 : :
2281 : : // extract this file to a temporary file
2282 : 202 : char* tmppath = NULL;
2283 : 202 : rc = asprintf (&tmppath, "%s/debuginfod-fdcache.XXXXXX", tmpdir.c_str());
2284 [ - + ]: 202 : if (rc < 0)
2285 [ # # # # ]: 0 : throw libc_exception (ENOMEM, "cannot allocate tmppath");
2286 : 202 : defer_dtor<void*,void> tmmpath_freer (tmppath, free);
2287 [ + - ]: 202 : fd = mkstemp (tmppath);
2288 [ - + ]: 202 : if (fd < 0)
2289 [ # # # # ]: 0 : throw libc_exception (errno, "cannot create temporary file");
2290 : : // NB: don't unlink (tmppath), as fdcache will take charge of it.
2291 : :
2292 : : // NB: this can take many uninterruptible seconds for a huge file
2293 [ + - ]: 202 : rc = archive_read_data_into_fd (a, fd);
2294 [ - + ]: 202 : if (rc != ARCHIVE_OK) // e.g. ENOSPC!
2295 : : {
2296 [ # # ]: 0 : close (fd);
2297 : 0 : unlink (tmppath);
2298 [ # # # # ]: 0 : throw archive_exception(a, "cannot extract file");
2299 : : }
2300 : :
2301 : : // Set the mtime so the fdcache file mtimes, even prefetched ones,
2302 : : // propagate to future webapi clients.
2303 : 202 : struct timespec tvs[2];
2304 : 202 : tvs[0].tv_sec = 0;
2305 : 202 : tvs[0].tv_nsec = UTIME_OMIT;
2306 [ + - ]: 202 : tvs[1].tv_sec = archive_entry_mtime(e);
2307 [ + - ]: 202 : tvs[1].tv_nsec = archive_entry_mtime_nsec(e);
2308 : 202 : (void) futimens (fd, tvs); /* best effort */
2309 : :
2310 : 202 : struct timespec extract_end;
2311 : 202 : clock_gettime (CLOCK_MONOTONIC, &extract_end);
2312 : 202 : double extract_time = (extract_end.tv_sec - extract_begin.tv_sec)
2313 : 202 : + (extract_end.tv_nsec - extract_begin.tv_nsec)/1.e9;
2314 : :
2315 [ + + ]: 202 : if (r != 0) // stage 3
2316 : : {
2317 : : // NB: now we know we have a complete reusable file; make fdcache
2318 : : // responsible for unlinking it later.
2319 [ + - + - : 48 : fdcache.intern(b_source0, fn,
+ - ]
2320 : : tmppath, archive_entry_size(e),
2321 : : false, extract_time); // prefetched ones go to the prefetch cache
2322 : 48 : prefetch_count --;
2323 [ + - ]: 48 : close (fd); // we're not saving this fd to make a mhd-response from!
2324 : 48 : continue;
2325 : : }
2326 : :
2327 : : // NB: now we know we have a complete reusable file; make fdcache
2328 : : // responsible for unlinking it later.
2329 [ + - + - : 154 : fdcache.intern(b_source0, b_source1,
+ - ]
2330 : : tmppath, archive_entry_size(e),
2331 : : true, extract_time); // requested ones go to the front of the line
2332 : :
2333 [ + + ]: 154 : if (!section.empty ())
2334 : : {
2335 [ + - + - ]: 4 : int scn_fd = extract_section (fd, b_mtime,
2336 [ + - + - : 8 : b_source0 + ":" + b_source1,
- + ]
2337 : : section, extract_begin);
2338 [ + - ]: 4 : close (fd);
2339 [ + - ]: 4 : if (scn_fd >= 0)
2340 : 4 : fd = scn_fd;
2341 : : else
2342 : : {
2343 [ # # ]: 0 : if (verbose)
2344 [ # # # # ]: 0 : obatched (clog) << "cannot find section " << section
2345 : : << " for archive " << b_source0
2346 [ # # # # : 0 : << " file " << b_source1 << endl;
# # # # #
# ]
2347 : 0 : return 0;
2348 : : }
2349 : :
2350 : 4 : rc = fstat(fd, &fs);
2351 [ - + ]: 4 : if (rc < 0)
2352 : : {
2353 [ # # ]: 0 : close (fd);
2354 [ # # # # ]: 0 : throw libc_exception (errno,
2355 [ # # # # : 0 : string ("fstat ") + b_source0 + string (" ") + section);
# # # # #
# # # # #
# # # # #
# ]
2356 : : }
2357 [ + - ]: 4 : r = MHD_create_response_from_fd (fs.st_size, fd);
2358 : : }
2359 : : else
2360 [ + - + - ]: 150 : r = MHD_create_response_from_fd (archive_entry_size(e), fd);
2361 : :
2362 [ + - + - : 308 : inc_metric ("http_responses_total","result",archive_extension + " archive");
+ - + - -
+ + + - -
- - ]
2363 [ - + ]: 154 : if (r == 0)
2364 : : {
2365 [ # # ]: 0 : if (verbose)
2366 [ # # # # : 0 : obatched(clog) << "cannot create fd-response for " << b_source0 << endl;
# # ]
2367 [ # # ]: 0 : close(fd);
2368 : 0 : break; // assume no chance of better luck around another iteration; no other copies of same file
2369 : : }
2370 : : else
2371 : : {
2372 [ + - ]: 154 : add_mhd_response_header (r, "Content-Type",
2373 : : "application/octet-stream");
2374 [ + - ]: 154 : add_mhd_response_header (r, "X-DEBUGINFOD-SIZE",
2375 [ + - + - ]: 154 : to_string(archive_entry_size(e)).c_str());
2376 [ + - ]: 154 : add_mhd_response_header (r, "X-DEBUGINFOD-ARCHIVE", b_source0.c_str());
2377 [ + - ]: 154 : add_mhd_response_header (r, "X-DEBUGINFOD-FILE", b_source1.c_str());
2378 [ - + - - ]: 154 : if(!ima_sig.empty()) add_mhd_response_header(r, "X-DEBUGINFOD-IMASIGNATURE", ima_sig.c_str());
2379 [ + - + - ]: 154 : add_mhd_last_modified (r, archive_entry_mtime(e));
2380 [ + - ]: 154 : if (verbose > 1)
2381 [ + - + - ]: 462 : obatched(clog) << "serving archive " << b_source0
2382 : : << " file " << b_source1
2383 : : << " section=" << section
2384 [ + - + - : 154 : << " IMA signature=" << ima_sig << endl;
+ - + - +
- + - +
- ]
2385 : : /* libmicrohttpd will close it. */
2386 [ + - ]: 154 : if (result_fd)
2387 : 154 : *result_fd = fd;
2388 : 154 : continue;
2389 : : }
2390 [ - - - - : 1842 : }
+ + ]
2391 : :
2392 : : // XXX: rpm/file not found: delete this R entry?
2393 : : return r;
2394 [ - + + + : 1618 : }
- + ]
2395 : :
2396 : : void
2397 : 646 : add_client_federation_headers(debuginfod_client *client, MHD_Connection* conn){
2398 : : // Transcribe incoming User-Agent:
2399 [ - + ]: 646 : string ua = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: "";
2400 [ + - + - : 650 : string ua_complete = string("User-Agent: ") + ua;
+ - ]
2401 [ + - ]: 646 : debuginfod_add_http_header (client, ua_complete.c_str());
2402 : :
2403 : : // Compute larger XFF:, for avoiding info loss during
2404 : : // federation, and for future cyclicity detection.
2405 [ + - + + : 1270 : string xff = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: "";
+ - + - ]
2406 [ + + ]: 646 : if (xff != "")
2407 [ + - - + ]: 52 : xff += string(", "); // comma separated list
2408 : :
2409 : 646 : unsigned int xff_count = 0;
2410 [ + + ]: 974 : for (auto&& i : xff){
2411 [ + + ]: 328 : if (i == ',') xff_count++;
2412 : : }
2413 : :
2414 : : // if X-Forwarded-For: exceeds N hops,
2415 : : // do not delegate a local lookup miss to upstream debuginfods.
2416 [ + + ]: 646 : if (xff_count >= forwarded_ttl_limit)
2417 [ + - + - ]: 8 : throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found, --forwared-ttl-limit reached \
2418 : 8 : and will not query the upstream servers");
2419 : :
2420 : : // Compute the client's numeric IP address only - so can't merge with conninfo()
2421 [ + - ]: 642 : const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn,
2422 : : MHD_CONNECTION_INFO_CLIENT_ADDRESS);
2423 [ + - ]: 642 : struct sockaddr *so = u ? u->client_addr : 0;
2424 : 642 : char hostname[256] = ""; // RFC1035
2425 [ + - - + ]: 642 : if (so && so->sa_family == AF_INET) {
2426 [ # # ]: 0 : (void) getnameinfo (so, sizeof (struct sockaddr_in), hostname, sizeof (hostname), NULL, 0,
2427 : : NI_NUMERICHOST);
2428 [ + - ]: 642 : } else if (so && so->sa_family == AF_INET6) {
2429 : 642 : struct sockaddr_in6* addr6 = (struct sockaddr_in6*) so;
2430 [ + - + - : 642 : if (IN6_IS_ADDR_V4MAPPED(&addr6->sin6_addr)) {
- + ]
2431 : 642 : struct sockaddr_in addr4;
2432 [ + - ]: 642 : memset (&addr4, 0, sizeof(addr4));
2433 : 642 : addr4.sin_family = AF_INET;
2434 : 642 : addr4.sin_port = addr6->sin6_port;
2435 [ + - ]: 642 : memcpy (&addr4.sin_addr.s_addr, addr6->sin6_addr.s6_addr+12, sizeof(addr4.sin_addr.s_addr));
2436 [ + - ]: 642 : (void) getnameinfo ((struct sockaddr*) &addr4, sizeof (addr4),
2437 : : hostname, sizeof (hostname), NULL, 0,
2438 : : NI_NUMERICHOST);
2439 : : } else {
2440 [ # # ]: 0 : (void) getnameinfo (so, sizeof (struct sockaddr_in6), hostname, sizeof (hostname), NULL, 0,
2441 : : NI_NUMERICHOST);
2442 : : }
2443 : : }
2444 : :
2445 [ + - + - : 1288 : string xff_complete = string("X-Forwarded-For: ")+xff+string(hostname);
+ - + - -
+ - + - -
- + ]
2446 [ + - ]: 642 : debuginfod_add_http_header (client, xff_complete.c_str());
2447 [ + + + - : 1360 : }
+ + ]
2448 : :
2449 : : static struct MHD_Response*
2450 : 864 : handle_buildid_match (bool internal_req_p,
2451 : : int64_t b_mtime,
2452 : : const string& b_stype,
2453 : : const string& b_source0,
2454 : : const string& b_source1,
2455 : : const string& section,
2456 : : int *result_fd)
2457 : : {
2458 : 864 : try
2459 : : {
2460 [ + + ]: 864 : if (b_stype == "F")
2461 [ + - ]: 74 : return handle_buildid_f_match(internal_req_p, b_mtime, b_source0,
2462 : : section, result_fd);
2463 [ + - ]: 790 : else if (b_stype == "R")
2464 [ + + ]: 790 : return handle_buildid_r_match(internal_req_p, b_mtime, b_source0,
2465 : : b_source1, section, result_fd);
2466 : : }
2467 [ - + ]: 58 : catch (const reportable_exception &e)
2468 : : {
2469 [ + - ]: 58 : e.report(clog);
2470 : : // Report but swallow libc etc. errors here; let the caller
2471 : : // iterate to other matches of the content.
2472 : 58 : }
2473 : :
2474 : : return 0;
2475 : : }
2476 : :
2477 : :
2478 : : static int
2479 : 4 : debuginfod_find_progress (debuginfod_client *, long a, long b)
2480 : : {
2481 [ - + ]: 4 : if (verbose > 4)
2482 [ # # # # : 0 : obatched(clog) << "federated debuginfod progress=" << a << "/" << b << endl;
# # # # ]
2483 : :
2484 : 4 : return interrupted;
2485 : : }
2486 : :
2487 : :
2488 : : // a little lru pool of debuginfod_client*s for reuse between query threads
2489 : :
2490 : : mutex dc_pool_lock;
2491 : : deque<debuginfod_client*> dc_pool;
2492 : :
2493 : 646 : debuginfod_client* debuginfod_pool_begin()
2494 : : {
2495 : 646 : unique_lock<mutex> lock(dc_pool_lock);
2496 [ + + ]: 646 : if (dc_pool.size() > 0)
2497 : : {
2498 [ + - + - : 1232 : inc_metric("dc_pool_op_count","op","begin-reuse");
+ - + - -
+ - + - -
- - ]
2499 : 616 : debuginfod_client *c = dc_pool.front();
2500 : 616 : dc_pool.pop_front();
2501 : 616 : return c;
2502 : : }
2503 [ + - + - : 60 : inc_metric("dc_pool_op_count","op","begin-new");
+ - + - -
+ - + - -
- - ]
2504 [ + - ]: 30 : return debuginfod_begin();
2505 : 646 : }
2506 : :
2507 : :
2508 : 150 : void debuginfod_pool_groom()
2509 : : {
2510 : 150 : unique_lock<mutex> lock(dc_pool_lock);
2511 [ + + ]: 180 : while (dc_pool.size() > 0)
2512 : : {
2513 [ + - + - : 60 : inc_metric("dc_pool_op_count","op","end");
+ - + - -
+ - + - -
- - ]
2514 [ + - ]: 30 : debuginfod_end(dc_pool.front());
2515 : 30 : dc_pool.pop_front();
2516 : : }
2517 : 150 : }
2518 : :
2519 : :
2520 : 646 : void debuginfod_pool_end(debuginfod_client* c)
2521 : : {
2522 : 646 : unique_lock<mutex> lock(dc_pool_lock);
2523 [ + - + - : 1292 : inc_metric("dc_pool_op_count","op","end-save");
+ - + - -
+ - + - -
- - ]
2524 [ + - ]: 646 : dc_pool.push_front(c); // accelerate reuse, vs. push_back
2525 : 646 : }
2526 : :
2527 : :
2528 : : static struct MHD_Response*
2529 : 1432 : handle_buildid (MHD_Connection* conn,
2530 : : const string& buildid /* unsafe */,
2531 : : string& artifacttype /* unsafe, cleanse on exception/return */,
2532 : : const string& suffix /* unsafe */,
2533 : : int *result_fd)
2534 : : {
2535 : : // validate artifacttype
2536 : 1432 : string atype_code;
2537 [ + + + - ]: 1432 : if (artifacttype == "debuginfo") atype_code = "D";
2538 [ + + + - ]: 690 : else if (artifacttype == "executable") atype_code = "E";
2539 [ + + + - ]: 68 : else if (artifacttype == "source") atype_code = "S";
2540 [ + + + - ]: 12 : else if (artifacttype == "section") atype_code = "I";
2541 : : else {
2542 [ + - ]: 4 : artifacttype = "invalid"; // PR28242 ensure http_resposes metrics don't propagate unclean user data
2543 [ + - + - ]: 12 : throw reportable_exception("invalid artifacttype");
2544 : : }
2545 : :
2546 [ + + ]: 1428 : if (conn != 0)
2547 [ + - + - : 3394 : inc_metric("http_requests_total", "type", artifacttype);
+ - - + -
- - + ]
2548 : :
2549 : 1428 : string section;
2550 [ + + ]: 1428 : if (atype_code == "I")
2551 : : {
2552 [ - + ]: 8 : if (suffix.size () < 2)
2553 [ # # # # ]: 0 : throw reportable_exception ("invalid section suffix");
2554 : :
2555 : : // Remove leading '/'
2556 [ + - - + ]: 8 : section = suffix.substr(1);
2557 : : }
2558 : :
2559 [ + + - + ]: 1484 : if (atype_code == "S" && suffix == "")
2560 [ # # # # ]: 0 : throw reportable_exception("invalid source suffix");
2561 : :
2562 : : // validate buildid
2563 [ + + ]: 1428 : if ((buildid.size() < 2) || // not empty
2564 [ + + + - : 2850 : (buildid.size() % 2) || // even number
+ - ]
2565 : 1422 : (buildid.find_first_not_of("0123456789abcdef") != string::npos)) // pure tasty lowercase hex
2566 [ + - - + ]: 12 : throw reportable_exception("invalid buildid");
2567 : :
2568 [ + - ]: 1422 : if (verbose > 1)
2569 [ + - + - ]: 4266 : obatched(clog) << "searching for buildid=" << buildid << " artifacttype=" << artifacttype
2570 [ + - + - : 1422 : << " suffix=" << suffix << endl;
+ - + - +
- ]
2571 : :
2572 : : // If invoked from the scanner threads, use the scanners' read-write
2573 : : // connection. Otherwise use the web query threads' read-only connection.
2574 [ + + ]: 1422 : sqlite3 *thisdb = (conn == 0) ? db : dbq;
2575 : :
2576 : 1422 : sqlite_ps *pp = 0;
2577 : :
2578 [ + + ]: 1422 : if (atype_code == "D")
2579 : : {
2580 [ - + ]: 742 : pp = new sqlite_ps (thisdb, "mhd-query-d",
2581 : : "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_d where buildid = ? "
2582 [ + - + - : 1484 : "order by mtime desc");
+ - + - +
- - - ]
2583 [ + - ]: 742 : pp->reset();
2584 [ + - ]: 742 : pp->bind(1, buildid);
2585 : : }
2586 [ + + ]: 680 : else if (atype_code == "E")
2587 : : {
2588 [ - + ]: 616 : pp = new sqlite_ps (thisdb, "mhd-query-e",
2589 : : "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_e where buildid = ? "
2590 [ + - + - : 1232 : "order by mtime desc");
+ - + - +
- - - ]
2591 [ + - ]: 616 : pp->reset();
2592 [ + - ]: 616 : pp->bind(1, buildid);
2593 : : }
2594 [ + + ]: 64 : else if (atype_code == "S")
2595 : : {
2596 : : // PR25548
2597 : : // Incoming source queries may come in with either dwarf-level OR canonicalized paths.
2598 : : // We let the query pass with either one.
2599 : :
2600 [ - + ]: 56 : pp = new sqlite_ps (thisdb, "mhd-query-s",
2601 : : "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_s where buildid = ? and artifactsrc in (?,?) "
2602 [ + - + - : 112 : "order by sharedprefix(source0,source0ref) desc, mtime desc");
+ - + - +
- - - ]
2603 [ + - ]: 56 : pp->reset();
2604 [ + - ]: 56 : pp->bind(1, buildid);
2605 : : // NB: we don't store the non-canonicalized path names any more, but old databases
2606 : : // might have them (and no canon ones), so we keep searching for both.
2607 [ + - ]: 56 : pp->bind(2, suffix);
2608 [ + - + - : 734 : pp->bind(3, canon_pathname(suffix));
- + ]
2609 : : }
2610 [ + - ]: 8 : else if (atype_code == "I")
2611 : : {
2612 [ - + ]: 8 : pp = new sqlite_ps (thisdb, "mhd-query-i",
2613 : : "select mtime, sourcetype, source0, source1, 1 as debug_p from " BUILDIDS "_query_d where buildid = ? "
2614 : : "union all "
2615 : : "select mtime, sourcetype, source0, source1, 0 as debug_p from " BUILDIDS "_query_e where buildid = ? "
2616 [ + - + - : 16 : "order by debug_p desc, mtime desc");
+ - + - +
- - - ]
2617 [ + - ]: 8 : pp->reset();
2618 [ + - ]: 8 : pp->bind(1, buildid);
2619 [ + - ]: 8 : pp->bind(2, buildid);
2620 : : }
2621 : 1422 : unique_ptr<sqlite_ps> ps_closer(pp); // release pp if exception or return
2622 : :
2623 : 1422 : bool do_upstream_section_query = true;
2624 : :
2625 : : // consume all the rows
2626 : 1484 : while (1)
2627 : : {
2628 [ + - ]: 1484 : int rc = pp->step();
2629 [ + + ]: 1484 : if (rc == SQLITE_DONE) break;
2630 [ - + ]: 864 : if (rc != SQLITE_ROW)
2631 [ # # # # ]: 0 : throw sqlite_exception(rc, "step");
2632 : :
2633 [ + - ]: 864 : int64_t b_mtime = sqlite3_column_int64 (*pp, 0);
2634 [ + - - + : 864 : string b_stype = string((const char*) sqlite3_column_text (*pp, 1) ?: ""); /* by DDL may not be NULL */
+ - ]
2635 [ + - - + : 864 : string b_source0 = string((const char*) sqlite3_column_text (*pp, 2) ?: ""); /* may be NULL */
+ - - - ]
2636 [ + - + + : 938 : string b_source1 = string((const char*) sqlite3_column_text (*pp, 3) ?: ""); /* may be NULL */
+ - - - ]
2637 : :
2638 [ + - ]: 864 : if (verbose > 1)
2639 [ + - + - : 2592 : obatched(clog) << "found mtime=" << b_mtime << " stype=" << b_stype
- - ]
2640 [ + - + - : 864 : << " source0=" << b_source0 << " source1=" << b_source1 << endl;
+ - + - +
- + - +
- ]
2641 : :
2642 : : // Try accessing the located match.
2643 : : // XXX: in case of multiple matches, attempt them in parallel?
2644 [ + - ]: 864 : auto r = handle_buildid_match (conn ? false : true,
2645 : : b_mtime, b_stype, b_source0, b_source1,
2646 : : section, result_fd);
2647 [ + + ]: 864 : if (r)
2648 [ + + ]: 802 : return r;
2649 : :
2650 : : // If a debuginfo file matching BUILDID was found but didn't contain
2651 : : // the desired section, then the section should not exist. Don't
2652 : : // bother querying upstream servers.
2653 [ + + + - : 62 : if (!section.empty () && (sqlite3_column_int (*pp, 4) == 1))
- + ]
2654 : : {
2655 : 4 : struct stat st;
2656 : :
2657 : : // For "F" sourcetype, check if the debuginfo exists. For "R"
2658 : : // sourcetype, check if the debuginfo was interned into the fdcache.
2659 [ - + ]: 6 : if ((b_stype == "F" && (stat (b_source0.c_str (), &st) == 0))
2660 [ + + + - : 6 : || (b_stype == "R" && fdcache.probe (b_source0, b_source1)))
+ - - + ]
2661 : : do_upstream_section_query = false;
2662 : : }
2663 [ + - - + : 1728 : }
+ - - + ]
2664 [ + - ]: 620 : pp->reset();
2665 : :
2666 [ - + ]: 620 : if (!do_upstream_section_query)
2667 [ # # # # ]: 0 : throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found");
2668 : :
2669 : : // We couldn't find it in the database. Last ditch effort
2670 : : // is to defer to other debuginfo servers.
2671 : :
2672 : 620 : int fd = -1;
2673 [ + - ]: 620 : debuginfod_client *client = debuginfod_pool_begin ();
2674 [ - + ]: 620 : if (client == NULL)
2675 [ # # # # ]: 0 : throw libc_exception(errno, "debuginfod client pool alloc");
2676 : 620 : defer_dtor<debuginfod_client*,void> client_closer (client, debuginfod_pool_end);
2677 : :
2678 [ + - ]: 620 : debuginfod_set_progressfn (client, & debuginfod_find_progress);
2679 : :
2680 [ + - ]: 620 : if (conn)
2681 [ + + ]: 620 : add_client_federation_headers(client, conn);
2682 : :
2683 [ + + ]: 616 : if (artifacttype == "debuginfo")
2684 [ + - ]: 68 : fd = debuginfod_find_debuginfo (client,
2685 [ + - ]: 68 : (const unsigned char*) buildid.c_str(),
2686 : : 0, NULL);
2687 [ + + ]: 548 : else if (artifacttype == "executable")
2688 [ + - ]: 546 : fd = debuginfod_find_executable (client,
2689 [ + - ]: 546 : (const unsigned char*) buildid.c_str(),
2690 : : 0, NULL);
2691 [ + - ]: 2 : else if (artifacttype == "source")
2692 [ + - ]: 2 : fd = debuginfod_find_source (client,
2693 [ + - ]: 2 : (const unsigned char*) buildid.c_str(),
2694 : : 0, suffix.c_str(), NULL);
2695 [ # # ]: 0 : else if (artifacttype == "section")
2696 [ # # ]: 0 : fd = debuginfod_find_section (client,
2697 [ # # ]: 0 : (const unsigned char*) buildid.c_str(),
2698 : : 0, section.c_str(), NULL);
2699 : :
2700 [ + + ]: 616 : if (fd >= 0)
2701 : : {
2702 [ + - ]: 4 : if (conn != 0)
2703 [ + - + - : 624 : inc_metric ("http_responses_total","result","upstream");
+ - + - -
+ - + - -
- - ]
2704 : 4 : struct stat s;
2705 : 4 : int rc = fstat (fd, &s);
2706 [ + - ]: 4 : if (rc == 0)
2707 : : {
2708 [ + - ]: 4 : auto r = MHD_create_response_from_fd ((uint64_t) s.st_size, fd);
2709 [ + - ]: 4 : if (r)
2710 : : {
2711 [ + - ]: 4 : add_mhd_response_header (r, "Content-Type",
2712 : : "application/octet-stream");
2713 : : // Copy the incoming headers
2714 [ + - ]: 4 : const char * hdrs = debuginfod_get_headers(client);
2715 [ + - ]: 4 : string header_dup;
2716 [ + - ]: 4 : if (hdrs)
2717 [ + - - + ]: 4 : header_dup = string(hdrs);
2718 : : // Parse the "header: value\n" lines into (h,v) tuples and pass on
2719 : 20 : while(1)
2720 : : {
2721 : 12 : size_t newline = header_dup.find('\n');
2722 [ + + ]: 12 : if (newline == string::npos) break;
2723 : 8 : size_t colon = header_dup.find(':');
2724 [ + - ]: 8 : if (colon == string::npos) break;
2725 [ + - ]: 8 : string header = header_dup.substr(0,colon);
2726 [ + - ]: 8 : string value = header_dup.substr(colon+1,newline-colon-1);
2727 : : // strip leading spaces from value
2728 : 8 : size_t nonspace = value.find_first_not_of(" ");
2729 [ + - ]: 8 : if (nonspace != string::npos)
2730 [ + - + + ]: 12 : value = value.substr(nonspace);
2731 [ + - ]: 8 : add_mhd_response_header(r, header.c_str(), value.c_str());
2732 [ + - + + : 12 : header_dup = header_dup.substr(newline+1);
+ + - - ]
2733 [ + - ]: 16 : }
2734 : :
2735 [ + - ]: 4 : add_mhd_last_modified (r, s.st_mtime);
2736 [ + - ]: 4 : if (verbose > 1)
2737 [ + - + - : 8 : obatched(clog) << "serving file from upstream debuginfod/cache" << endl;
- - ]
2738 [ + - ]: 4 : if (result_fd)
2739 : 4 : *result_fd = fd;
2740 [ + - ]: 4 : return r; // NB: don't close fd; libmicrohttpd will
2741 : 4 : }
2742 : : }
2743 [ # # ]: 0 : close (fd);
2744 : : }
2745 : : else
2746 [ + + ]: 612 : switch(fd)
2747 : : {
2748 : : case -ENOSYS:
2749 : : break;
2750 : : case -ENOENT:
2751 : : break;
2752 : 532 : default: // some more tricky error
2753 [ + - + - ]: 1064 : throw libc_exception(-fd, "upstream debuginfod query failed");
2754 : : }
2755 : :
2756 [ + - - + ]: 160 : throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found");
2757 [ - + - + ]: 2044 : }
2758 : :
2759 : :
2760 : : ////////////////////////////////////////////////////////////////////////
2761 : :
2762 : : static map<string,double> metrics; // arbitrary data for /metrics query
2763 : : // NB: store int64_t since all our metrics are integers; prometheus accepts double
2764 : : static mutex metrics_lock;
2765 : : // NB: these objects get released during the process exit via global dtors
2766 : : // do not call them from within other global dtors
2767 : :
2768 : : // utility function for assembling prometheus-compatible
2769 : : // name="escaped-value" strings
2770 : : // https://prometheus.io/docs/instrumenting/exposition_formats/
2771 : : static string
2772 : 260710 : metric_label(const string& name, const string& value)
2773 : : {
2774 : 260710 : string x = name + "=\"";
2775 [ + + ]: 4216784 : for (auto&& c : value)
2776 [ - - - + ]: 3956137 : switch(c)
2777 : : {
2778 [ # # ]: 0 : case '\\': x += "\\\\"; break;
2779 [ # # ]: 0 : case '\"': x += "\\\""; break;
2780 [ # # ]: 0 : case '\n': x += "\\n"; break;
2781 [ + - ]: 7912254 : default: x += c; break;
2782 : : }
2783 [ + - ]: 260647 : x += "\"";
2784 : 260679 : return x;
2785 : 0 : }
2786 : :
2787 : :
2788 : : // add prometheus-format metric name + label tuple (if any) + value
2789 : :
2790 : : static void
2791 : 1296 : set_metric(const string& metric, double value)
2792 : : {
2793 : 1296 : unique_lock<mutex> lock(metrics_lock);
2794 [ + - ]: 1296 : metrics[metric] = value;
2795 : 1296 : }
2796 : : static void
2797 : 594 : inc_metric(const string& metric)
2798 : : {
2799 : 594 : unique_lock<mutex> lock(metrics_lock);
2800 [ + - ]: 594 : metrics[metric] ++;
2801 : 594 : }
2802 : : static void
2803 : 5512 : set_metric(const string& metric,
2804 : : const string& lname, const string& lvalue,
2805 : : double value)
2806 : : {
2807 [ + - + - : 11024 : string key = (metric + "{" + metric_label(lname, lvalue) + "}");
- + - + +
+ - - ]
2808 [ + - ]: 5512 : unique_lock<mutex> lock(metrics_lock);
2809 [ + - ]: 5512 : metrics[key] = value;
2810 [ + - ]: 11024 : }
2811 : :
2812 : : static void
2813 : 115860 : inc_metric(const string& metric,
2814 : : const string& lname, const string& lvalue)
2815 : : {
2816 [ + - + - : 249627 : string key = (metric + "{" + metric_label(lname, lvalue) + "}");
- + + + +
+ - - ]
2817 [ + - ]: 115860 : unique_lock<mutex> lock(metrics_lock);
2818 [ + - ]: 115861 : metrics[key] ++;
2819 [ + - ]: 231720 : }
2820 : : static void
2821 : 113700 : add_metric(const string& metric,
2822 : : const string& lname, const string& lvalue,
2823 : : double value)
2824 : : {
2825 [ + - + - : 245311 : string key = (metric + "{" + metric_label(lname, lvalue) + "}");
- + + + +
+ - - ]
2826 [ + - ]: 113703 : unique_lock<mutex> lock(metrics_lock);
2827 [ + - ]: 113710 : metrics[key] += value;
2828 [ + - ]: 227418 : }
2829 : : static void
2830 : 594 : add_metric(const string& metric,
2831 : : double value)
2832 : : {
2833 : 594 : unique_lock<mutex> lock(metrics_lock);
2834 [ + - ]: 594 : metrics[metric] += value;
2835 : 594 : }
2836 : :
2837 : :
2838 : : // and more for higher arity labels if needed
2839 : :
2840 : : static void
2841 : 6417 : inc_metric(const string& metric,
2842 : : const string& lname, const string& lvalue,
2843 : : const string& rname, const string& rvalue)
2844 : : {
2845 [ + - - + : 12834 : string key = (metric + "{"
- - ]
2846 [ + - + - : 25668 : + metric_label(lname, lvalue) + ","
- + - + +
+ - - ]
2847 [ + - - + : 19251 : + metric_label(rname, rvalue) + "}");
- + ]
2848 [ + - ]: 6417 : unique_lock<mutex> lock(metrics_lock);
2849 [ + - ]: 6417 : metrics[key] ++;
2850 [ + - ]: 12834 : }
2851 : : static void
2852 : 6417 : add_metric(const string& metric,
2853 : : const string& lname, const string& lvalue,
2854 : : const string& rname, const string& rvalue,
2855 : : double value)
2856 : : {
2857 [ + - - + : 12834 : string key = (metric + "{"
- - ]
2858 [ + - + - : 25668 : + metric_label(lname, lvalue) + ","
- + - + +
+ - - ]
2859 [ + - - + : 19251 : + metric_label(rname, rvalue) + "}");
- + ]
2860 [ + - ]: 6417 : unique_lock<mutex> lock(metrics_lock);
2861 [ + - ]: 6417 : metrics[key] += value;
2862 [ + - ]: 12834 : }
2863 : :
2864 : : static struct MHD_Response*
2865 : 719 : handle_metrics (off_t* size)
2866 : : {
2867 : 719 : stringstream o;
2868 : 719 : {
2869 [ + - ]: 719 : unique_lock<mutex> lock(metrics_lock);
2870 [ + + ]: 75801 : for (auto&& i : metrics)
2871 [ + - ]: 75082 : o << i.first
2872 : : << " "
2873 [ + - + - ]: 75082 : << std::setprecision(std::numeric_limits<double>::digits10 + 1)
2874 [ + - + - ]: 75082 : << i.second
2875 : 75082 : << endl;
2876 : 0 : }
2877 [ + - ]: 719 : const string& os = o.str();
2878 [ + - ]: 719 : MHD_Response* r = MHD_create_response_from_buffer (os.size(),
2879 [ + - ]: 719 : (void*) os.c_str(),
2880 : : MHD_RESPMEM_MUST_COPY);
2881 [ + - ]: 719 : if (r != NULL)
2882 : : {
2883 [ + - ]: 719 : *size = os.size();
2884 [ + - ]: 719 : add_mhd_response_header (r, "Content-Type", "text/plain");
2885 : : }
2886 [ + - ]: 1438 : return r;
2887 : 719 : }
2888 : :
2889 : :
2890 : : static struct MHD_Response*
2891 : 26 : handle_metadata (MHD_Connection* conn,
2892 : : string key, string value, off_t* size)
2893 : : {
2894 : 26 : MHD_Response* r;
2895 : 26 : sqlite3 *thisdb = dbq;
2896 : :
2897 : : // Query locally for matching e, d files
2898 : 26 : string op;
2899 [ + + ]: 26 : if (key == "glob")
2900 [ + - ]: 22 : op = "glob";
2901 [ + - ]: 4 : else if (key == "file")
2902 [ + - ]: 4 : op = "=";
2903 : : else
2904 [ # # # # ]: 0 : throw reportable_exception("/metadata webapi error, unsupported key");
2905 : :
2906 : : // Since PR30378, the file names are segmented into two tables. We
2907 : : // could do a glob/= search over the _files_v view that combines
2908 : : // them, but that means that the entire _files_v thing has to be
2909 : : // materialized & scanned to do the query. Slow! Instead, we can
2910 : : // segment the incoming file/glob pattern into dirname / basename
2911 : : // parts, and apply them to the corresponding table. This is done
2912 : : // by splitting the value at the last "/". If absent, the same
2913 : : // convention as is used in register_file_name().
2914 : :
2915 : 26 : string dirname, bname; // basename is a "poisoned" identifier on some distros
2916 : 26 : size_t slash = value.rfind('/');
2917 [ - + ]: 26 : if (slash == std::string::npos) {
2918 [ # # ]: 0 : dirname = "";
2919 [ # # ]: 0 : bname = value;
2920 : : } else {
2921 [ + - - + ]: 26 : dirname = value.substr(0, slash);
2922 [ + - - + ]: 26 : bname = value.substr(slash+1);
2923 : : }
2924 : :
2925 : : // NB: further optimization is possible: replacing the 'glob' op
2926 : : // with simple equality, if the corresponding value segment lacks
2927 : : // metacharacters. sqlite may or may not be smart enough to do so,
2928 : : // so we help out.
2929 [ + - - - ]: 26 : string metacharacters = "[]*?";
2930 [ + + + + : 48 : string dop = (op == "glob" && dirname.find_first_of(metacharacters) == string::npos) ? "=" : op;
+ - + - -
- ]
2931 [ + + - + : 48 : string bop = (op == "glob" && bname.find_first_of(metacharacters) == string::npos) ? "=" : op;
- - + - -
- ]
2932 : :
2933 : 26 : string sql = string(
2934 : : // explicit query r_de and f_de once here, rather than the query_d and query_e
2935 : : // separately, because they scan the same tables, so we'd double the work
2936 : : "select d1.executable_p, d1.debuginfo_p, 0 as source_p, "
2937 : : " b1.hex, f1d.name || '/' || f1b.name as file, a1.name as archive "
2938 : : "from " BUILDIDS "_r_de d1, " BUILDIDS "_files f1, " BUILDIDS "_fileparts f1b, " BUILDIDS "_fileparts f1d, "
2939 : : BUILDIDS "_buildids b1, " BUILDIDS "_files_v a1 "
2940 : : "where f1.id = d1.content and a1.id = d1.file and d1.buildid = b1.id "
2941 [ + - + - : 78 : " and f1d.name " + dop + " ? and f1b.name " + bop + " ? and f1.dirname = f1d.id and f1.basename = f1b.id "
- + - + -
+ ]
2942 : : "union all \n"
2943 : : "select d2.executable_p, d2.debuginfo_p, 0, "
2944 : : " b2.hex, f2d.name || '/' || f2b.name, NULL "
2945 : : "from " BUILDIDS "_f_de d2, " BUILDIDS "_files f2, " BUILDIDS "_fileparts f2b, " BUILDIDS "_fileparts f2d, "
2946 : : BUILDIDS "_buildids b2 "
2947 : : "where f2.id = d2.file and d2.buildid = b2.id "
2948 [ + - + - : 78 : " and f2d.name " + dop + " ? and f2b.name " + bop + " ? "
- + - + -
+ - - ]
2949 [ - + ]: 26 : " and f2.dirname = f2d.id and f2.basename = f2b.id");
2950 : :
2951 : : // NB: we could query source file names too, thusly:
2952 : : //
2953 : : // select * from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f1, " BUILDIDS "_r_sref sr
2954 : : // where b.id = sr.buildid and f1.id = sr.artifactsrc and f1.name " + op + "?"
2955 : : // UNION ALL something with BUILDIDS "_f_s"
2956 : : //
2957 : : // But the first part of this query cannot run fast without the same index temp-created
2958 : : // during "maxigroom":
2959 : : // create index " BUILDIDS "_r_sref_arc on " BUILDIDS "_r_sref(artifactsrc);
2960 : : // and unfortunately this index is HUGE. It's similar to the size of the _r_sref
2961 : : // table, which is already the largest part of a debuginfod index. Adding that index
2962 : : // would nearly double the .sqlite db size.
2963 : :
2964 [ + - + - : 26 : sqlite_ps *pp = new sqlite_ps (thisdb, "mhd-query-meta-glob", sql);
+ - + - ]
2965 [ + - ]: 26 : pp->reset();
2966 [ + - ]: 26 : pp->bind(1, dirname);
2967 [ + - ]: 26 : pp->bind(2, bname);
2968 [ + - ]: 26 : pp->bind(3, dirname);
2969 [ + - ]: 26 : pp->bind(4, bname);
2970 [ + - ]: 26 : unique_ptr<sqlite_ps> ps_closer(pp); // release pp if exception or return
2971 : :
2972 [ + - ]: 26 : json_object *metadata = json_object_new_object();
2973 [ - + - - : 26 : if (!metadata) throw libc_exception(ENOMEM, "json allocation");
- - ]
2974 : 26 : defer_dtor<json_object*,int> metadata_d(metadata, json_object_put);
2975 [ + - ]: 26 : json_object *metadata_arr = json_object_new_array();
2976 [ - + - - : 26 : if (!metadata_arr) throw libc_exception(ENOMEM, "json allocation");
- - ]
2977 [ + - ]: 26 : json_object_object_add(metadata, "results", metadata_arr);
2978 : : // consume all the rows
2979 : 26 : struct timespec ts_start;
2980 : 26 : clock_gettime (CLOCK_MONOTONIC, &ts_start);
2981 : :
2982 : 26 : int rc;
2983 : 26 : bool metadata_complete = true;
2984 [ + - + + ]: 68 : while (SQLITE_DONE != (rc = pp->step()))
2985 : : {
2986 : : // break out of loop if we have searched too long
2987 : 16 : struct timespec ts_end;
2988 : 16 : clock_gettime (CLOCK_MONOTONIC, &ts_end);
2989 : 16 : double deltas = (ts_end.tv_sec - ts_start.tv_sec) + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9;
2990 [ + - - + ]: 16 : if (metadata_maxtime_s > 0 && deltas > metadata_maxtime_s)
2991 : : {
2992 : 0 : metadata_complete = false;
2993 : 0 : break;
2994 : : }
2995 : :
2996 [ - + - - : 16 : if (rc != SQLITE_ROW) throw sqlite_exception(rc, "step");
- - ]
2997 : :
2998 [ + - ]: 16 : int m_executable_p = sqlite3_column_int (*pp, 0);
2999 [ + - ]: 16 : int m_debuginfo_p = sqlite3_column_int (*pp, 1);
3000 [ + - ]: 16 : int m_source_p = sqlite3_column_int (*pp, 2);
3001 [ + - - + : 16 : string m_buildid = (const char*) sqlite3_column_text (*pp, 3) ?: ""; // should always be non-null
+ - ]
3002 [ + - - + : 16 : string m_file = (const char*) sqlite3_column_text (*pp, 4) ?: "";
+ - - - ]
3003 [ + - - + : 16 : string m_archive = (const char*) sqlite3_column_text (*pp, 5) ?: "";
+ - - - ]
3004 : :
3005 : : // Confirm that m_file matches in the fnmatch(FNM_PATHNAME)
3006 : : // sense, since sqlite's GLOB operator is a looser filter.
3007 [ + - + - : 16 : if (key == "glob" && fnmatch(value.c_str(), m_file.c_str(), FNM_PATHNAME) != 0)
- + ]
3008 [ # # ]: 0 : continue;
3009 : :
3010 : 48 : auto add_metadata = [metadata_arr, m_buildid, m_file, m_archive](const string& type) {
3011 : 16 : json_object* entry = json_object_new_object();
3012 [ - + - - : 16 : if (NULL == entry) throw libc_exception (ENOMEM, "cannot allocate json");
- - ]
3013 : 16 : defer_dtor<json_object*,int> entry_d(entry, json_object_put);
3014 : :
3015 : 144 : auto add_entry_metadata = [entry](const char* k, string v) {
3016 : 64 : json_object* s;
3017 [ + - ]: 64 : if(v != "") {
3018 : 64 : s = json_object_new_string(v.c_str());
3019 [ - + - - : 64 : if (NULL == s) throw libc_exception (ENOMEM, "cannot allocate json");
- - ]
3020 : 64 : json_object_object_add(entry, k, s);
3021 : : }
3022 : 64 : };
3023 : :
3024 [ + - + - ]: 16 : add_entry_metadata("type", type.c_str());
3025 [ + - + - ]: 16 : add_entry_metadata("buildid", m_buildid);
3026 [ + - + - ]: 16 : add_entry_metadata("file", m_file);
3027 [ + - + - : 32 : if (m_archive != "") add_entry_metadata("archive", m_archive);
+ - ]
3028 [ - + ]: 16 : if (verbose > 3)
3029 [ # # ]: 0 : obatched(clog) << "metadata found local "
3030 : : << json_object_to_json_string_ext(entry,
3031 [ # # # # : 0 : JSON_C_TO_STRING_PRETTY)
# # ]
3032 : 0 : << endl;
3033 : :
3034 : : // Increase ref count to switch its ownership
3035 [ + - + - ]: 16 : json_object_array_add(metadata_arr, json_object_get(entry));
3036 [ + - + - : 32 : };
+ - ]
3037 : :
3038 [ + - + - : 32 : if (m_executable_p) add_metadata("executable");
+ - ]
3039 [ - + - - : 16 : if (m_debuginfo_p) add_metadata("debuginfo");
- - ]
3040 [ - + - - : 16 : if (m_source_p) add_metadata("source");
- - ]
3041 [ - - - - : 64 : }
+ - + - +
- ]
3042 [ + - ]: 26 : pp->reset();
3043 : :
3044 [ + - ]: 26 : unsigned num_local_results = json_object_array_length(metadata_arr);
3045 : :
3046 : : // Query upstream as well
3047 [ + - ]: 26 : debuginfod_client *client = debuginfod_pool_begin();
3048 [ + - ]: 26 : if (client != NULL)
3049 : : {
3050 [ + - ]: 26 : add_client_federation_headers(client, conn);
3051 : :
3052 : 26 : int upstream_metadata_fd;
3053 : 26 : char *upstream_metadata_file = NULL;
3054 [ + - ]: 26 : upstream_metadata_fd = debuginfod_find_metadata(client, key.c_str(), (char*)value.c_str(),
3055 : : &upstream_metadata_file);
3056 [ + + ]: 26 : if (upstream_metadata_fd >= 0) {
3057 : : /* json-c >= 0.13 has json_object_from_fd(). */
3058 [ + - ]: 16 : json_object *upstream_metadata_json = json_object_from_file(upstream_metadata_file);
3059 : 16 : free (upstream_metadata_file);
3060 : 16 : json_object *upstream_metadata_json_arr;
3061 : 16 : json_object *upstream_complete;
3062 [ - + ]: 16 : if (NULL != upstream_metadata_json &&
3063 [ + - + - : 32 : json_object_object_get_ex(upstream_metadata_json, "results", &upstream_metadata_json_arr) &&
- + ]
3064 [ + - ]: 16 : json_object_object_get_ex(upstream_metadata_json, "complete", &upstream_complete))
3065 : : {
3066 [ + - ]: 16 : metadata_complete &= json_object_get_boolean(upstream_complete);
3067 [ + - + + ]: 20 : for (int i = 0, n = json_object_array_length(upstream_metadata_json_arr); i < n; i++)
3068 : : {
3069 [ + - ]: 4 : json_object *entry = json_object_array_get_idx(upstream_metadata_json_arr, i);
3070 [ - + ]: 4 : if (verbose > 3)
3071 [ # # ]: 0 : obatched(clog) << "metadata found remote "
3072 : : << json_object_to_json_string_ext(entry,
3073 [ # # # # : 0 : JSON_C_TO_STRING_PRETTY)
# # ]
3074 : 0 : << endl;
3075 : :
3076 [ + - ]: 4 : json_object_get(entry); // increment reference count
3077 [ + - ]: 4 : json_object_array_add(metadata_arr, entry);
3078 : : }
3079 [ + - ]: 16 : json_object_put(upstream_metadata_json);
3080 : : }
3081 [ + - ]: 16 : close(upstream_metadata_fd);
3082 : : }
3083 [ + - ]: 26 : debuginfod_pool_end (client);
3084 : : }
3085 : :
3086 [ + - ]: 26 : unsigned num_total_results = json_object_array_length(metadata_arr);
3087 : :
3088 [ + - ]: 26 : if (verbose > 2)
3089 [ + - + - ]: 78 : obatched(clog) << "metadata found local=" << num_local_results
3090 [ + - + - ]: 26 : << " remote=" << (num_total_results-num_local_results)
3091 [ + - + - : 26 : << " total=" << num_total_results
+ - ]
3092 : 26 : << endl;
3093 : :
3094 [ + - + - ]: 26 : json_object_object_add(metadata, "complete", json_object_new_boolean(metadata_complete));
3095 [ + - ]: 26 : const char* metadata_str = json_object_to_json_string(metadata);
3096 [ - + ]: 26 : if (!metadata_str)
3097 [ # # # # ]: 0 : throw libc_exception (ENOMEM, "cannot allocate json");
3098 [ + - ]: 26 : r = MHD_create_response_from_buffer (strlen(metadata_str),
3099 : : (void*) metadata_str,
3100 : : MHD_RESPMEM_MUST_COPY);
3101 : 26 : *size = strlen(metadata_str);
3102 [ + - ]: 26 : if (r)
3103 [ + - ]: 26 : add_mhd_response_header(r, "Content-Type", "application/json");
3104 : 26 : return r;
3105 [ + - - + : 52 : }
- + - + -
+ - + -
+ ]
3106 : :
3107 : :
3108 : : static struct MHD_Response*
3109 : 0 : handle_root (off_t* size)
3110 : : {
3111 [ # # # # : 0 : static string version = "debuginfod (" + string (PACKAGE_NAME) + ") "
# # # # #
# # # ]
3112 [ # # # # : 0 : + string (PACKAGE_VERSION);
# # # # #
# ]
3113 : 0 : MHD_Response* r = MHD_create_response_from_buffer (version.size (),
3114 : 0 : (void *) version.c_str (),
3115 : : MHD_RESPMEM_PERSISTENT);
3116 [ # # ]: 0 : if (r != NULL)
3117 : : {
3118 : 0 : *size = version.size ();
3119 : 0 : add_mhd_response_header (r, "Content-Type", "text/plain");
3120 : : }
3121 : 0 : return r;
3122 : : }
3123 : :
3124 : :
3125 : : ////////////////////////////////////////////////////////////////////////
3126 : :
3127 : :
3128 : : /* libmicrohttpd callback */
3129 : : static MHD_RESULT
3130 : 4277 : handler_cb (void * /*cls*/,
3131 : : struct MHD_Connection *connection,
3132 : : const char *url,
3133 : : const char *method,
3134 : : const char * /*version*/,
3135 : : const char * /*upload_data*/,
3136 : : size_t * /*upload_data_size*/,
3137 : : void ** ptr)
3138 : : {
3139 : 4277 : struct MHD_Response *r = NULL;
3140 : 4277 : string url_copy = url;
3141 : :
3142 : : /* libmicrohttpd always makes (at least) two callbacks: once just
3143 : : past the headers, and one after the request body is finished
3144 : : being received. If we process things early (first callback) and
3145 : : queue a response, libmicrohttpd would suppress http keep-alive
3146 : : (via connection->read_closed = true). */
3147 : 4277 : static int aptr; /* just some random object to use as a flag */
3148 [ + + ]: 4277 : if (&aptr != *ptr)
3149 : : {
3150 : : /* do never respond on first call */
3151 : 2139 : *ptr = &aptr;
3152 : 2139 : return MHD_YES;
3153 : : }
3154 : 2138 : *ptr = NULL; /* reset when done */
3155 : :
3156 [ + - ]: 2138 : const char *maxsize_string = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, "X-DEBUGINFOD-MAXSIZE");
3157 : 2139 : long maxsize = 0;
3158 [ + + + - ]: 2139 : if (maxsize_string != NULL && maxsize_string[0] != '\0')
3159 : 2 : maxsize = atol(maxsize_string);
3160 : : else
3161 : : maxsize = 0;
3162 : :
3163 : : #if MHD_VERSION >= 0x00097002
3164 : 2139 : enum MHD_Result rc;
3165 : : #else
3166 : : int rc = MHD_NO; // mhd
3167 : : #endif
3168 : 2139 : int http_code = 500;
3169 : 2139 : off_t http_size = -1;
3170 : 2139 : struct timespec ts_start, ts_end;
3171 : 2139 : clock_gettime (CLOCK_MONOTONIC, &ts_start);
3172 : 2139 : double afteryou = 0.0;
3173 [ + - ]: 2139 : string artifacttype, suffix;
3174 : 2139 : string urlargs; // for logging
3175 : :
3176 : 2139 : try
3177 : : {
3178 [ + - - + : 4912 : if (string(method) != "GET")
- + ]
3179 [ # # # # ]: 0 : throw reportable_exception(400, "we support GET only");
3180 : :
3181 : : /* Start decoding the URL. */
3182 : 2139 : size_t slash1 = url_copy.find('/', 1);
3183 [ + - ]: 2139 : string url1 = url_copy.substr(0, slash1); // ok even if slash1 not found
3184 : :
3185 [ + + + # ]: 3523 : if (slash1 != string::npos && url1 == "/buildid")
3186 : : {
3187 : : // PR27863: block this thread awhile if another thread is already busy
3188 : : // fetching the exact same thing. This is better for Everyone.
3189 : : // The latecomer says "... after you!" and waits.
3190 [ + - + - : 3402 : add_metric ("thread_busy", "role", "http-buildid-after-you", 1);
+ - + - -
+ + - - -
- - ]
3191 : : #ifdef HAVE_PTHREAD_SETNAME_NP
3192 : 1388 : (void) pthread_setname_np (pthread_self(), "mhd-buildid-after-you");
3193 : : #endif
3194 : 1388 : struct timespec tsay_start, tsay_end;
3195 : 1388 : clock_gettime (CLOCK_MONOTONIC, &tsay_start);
3196 [ + + + - ]: 1388 : static unique_set<string> busy_urls;
3197 [ + - ]: 1388 : unique_set_reserver<string> after_you(busy_urls, url_copy);
3198 : 1388 : clock_gettime (CLOCK_MONOTONIC, &tsay_end);
3199 : 1388 : afteryou = (tsay_end.tv_sec - tsay_start.tv_sec) + (tsay_end.tv_nsec - tsay_start.tv_nsec)/1.e9;
3200 [ + - + - : 2776 : add_metric ("thread_busy", "role", "http-buildid-after-you", -1);
+ - + - -
+ + - - -
- - ]
3201 : :
3202 [ + - + - : 2776 : tmp_inc_metric m ("thread_busy", "role", "http-buildid");
+ - + - -
+ - + - -
- - ]
3203 : : #ifdef HAVE_PTHREAD_SETNAME_NP
3204 : 1388 : (void) pthread_setname_np (pthread_self(), "mhd-buildid");
3205 : : #endif
3206 : 1388 : size_t slash2 = url_copy.find('/', slash1+1);
3207 [ - + ]: 1388 : if (slash2 == string::npos)
3208 [ # # # # ]: 0 : throw reportable_exception("/buildid/ webapi error, need buildid");
3209 : :
3210 [ + - ]: 1388 : string buildid = url_copy.substr(slash1+1, slash2-slash1-1);
3211 : :
3212 : 1388 : size_t slash3 = url_copy.find('/', slash2+1);
3213 : :
3214 [ + + ]: 1388 : if (slash3 == string::npos)
3215 : : {
3216 [ + - - + ]: 1324 : artifacttype = url_copy.substr(slash2+1);
3217 [ + - ]: 1324 : suffix = "";
3218 : : }
3219 : : else
3220 : : {
3221 [ + - - + ]: 64 : artifacttype = url_copy.substr(slash2+1, slash3-slash2-1);
3222 [ + - - + : 690 : suffix = url_copy.substr(slash3); // include the slash in the suffix
+ + ]
3223 : : }
3224 : :
3225 : : // get the resulting fd so we can report its size
3226 : 1388 : int fd;
3227 [ + + ]: 1388 : r = handle_buildid(connection, buildid, artifacttype, suffix, &fd);
3228 [ + - ]: 762 : if (r)
3229 : : {
3230 : 762 : struct stat fs;
3231 [ + - ]: 762 : if (fstat(fd, &fs) == 0)
3232 : 762 : http_size = fs.st_size;
3233 : : // libmicrohttpd will close (fd);
3234 : : }
3235 : 2014 : }
3236 [ + + ]: 749 : else if (url1 == "/metrics")
3237 : : {
3238 [ + - + - : 1438 : tmp_inc_metric m ("thread_busy", "role", "http-metrics");
+ - + - -
+ - + - -
- - ]
3239 [ + - ]: 719 : artifacttype = "metrics";
3240 [ + - + - : 1438 : inc_metric("http_requests_total", "type", artifacttype);
+ - - + -
- ]
3241 [ + - ]: 719 : r = handle_metrics(& http_size);
3242 : 719 : }
3243 [ + + ]: 32 : else if (url1 == "/metadata")
3244 : : {
3245 [ + - + - : 52 : tmp_inc_metric m ("thread_busy", "role", "http-metadata");
+ - + - -
+ - + - -
- - ]
3246 [ + - ]: 26 : const char* key = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "key");
3247 [ + - ]: 26 : const char* value = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "value");
3248 [ - + ]: 26 : if (NULL == value || NULL == key)
3249 [ # # # # ]: 0 : throw reportable_exception("/metadata webapi error, need key and value");
3250 : :
3251 [ + - + - : 26 : urlargs = string("?key=") + string(key) + string("&value=") + string(value); // apprx., for logging
+ - + - +
- + - + -
- + - + -
+ - + - +
- + + + -
- - - -
- ]
3252 [ + - ]: 26 : artifacttype = "metadata";
3253 [ + - + - : 52 : inc_metric("http_requests_total", "type", artifacttype);
+ - - + -
- ]
3254 [ + - + - : 26 : r = handle_metadata(connection, key, value, &http_size);
+ - - + +
+ - - ]
3255 : 26 : }
3256 [ - + ]: 6 : else if (url1 == "/")
3257 : : {
3258 [ # # ]: 0 : artifacttype = "/";
3259 [ - - - - : 634 : inc_metric("http_requests_total", "type", artifacttype);
- - - - -
- - + ]
3260 [ # # ]: 0 : r = handle_root(& http_size);
3261 : : }
3262 : : else
3263 [ + - + - : 18 : throw reportable_exception("webapi error, unrecognized '" + url1 + "'");
+ - - + ]
3264 : :
3265 [ - + ]: 1507 : if (r == 0)
3266 [ # # # # ]: 0 : throw reportable_exception("internal error, missing response");
3267 : :
3268 [ + + + - ]: 1507 : if (maxsize > 0 && http_size > maxsize)
3269 : : {
3270 [ + - ]: 2 : MHD_destroy_response(r);
3271 [ + - + - : 6 : throw reportable_exception(406, "File too large, max size=" + std::to_string(maxsize));
+ - - + ]
3272 : : }
3273 : :
3274 [ + - ]: 1505 : rc = MHD_queue_response (connection, MHD_HTTP_OK, r);
3275 : 1505 : http_code = MHD_HTTP_OK;
3276 [ + - ]: 1505 : MHD_destroy_response (r);
3277 : 2139 : }
3278 [ - + ]: 634 : catch (const reportable_exception& e)
3279 : : {
3280 [ + - + - : 1268 : inc_metric("http_responses_total","result","error");
+ - + - -
+ - + - -
- - ]
3281 [ + - ]: 634 : e.report(clog);
3282 : 634 : http_code = e.code;
3283 [ + - ]: 634 : http_size = e.message.size();
3284 [ + - ]: 634 : rc = e.mhd_send_response (connection);
3285 : 634 : }
3286 : :
3287 : 2139 : clock_gettime (CLOCK_MONOTONIC, &ts_end);
3288 : 2139 : double deltas = (ts_end.tv_sec - ts_start.tv_sec) + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9;
3289 : : // afteryou: delay waiting for other client's identical query to complete
3290 : : // deltas: total latency, including afteryou waiting
3291 [ + - + - : 4278 : obatched(clog) << conninfo(connection)
- - ]
3292 : : << ' ' << method << ' ' << url << urlargs
3293 [ + - + - : 2139 : << ' ' << http_code << ' ' << http_size
+ - + - +
- + - + -
+ - ]
3294 [ + - + - : 2139 : << ' ' << (int)(afteryou*1000) << '+' << (int)((deltas-afteryou)*1000) << "ms"
+ - + - +
- + - +
- ]
3295 [ + - ]: 2139 : << endl;
3296 : :
3297 : : // related prometheus metrics
3298 : 2139 : string http_code_str = to_string(http_code);
3299 [ + - + - : 4278 : add_metric("http_responses_transfer_bytes_sum",
+ - + - -
+ - + - -
- - ]
3300 : : "code", http_code_str, "type", artifacttype, http_size);
3301 [ + - + - : 4278 : inc_metric("http_responses_transfer_bytes_count",
+ - + - -
+ - + - -
- - ]
3302 : : "code", http_code_str, "type", artifacttype);
3303 : :
3304 [ + - + - : 4278 : add_metric("http_responses_duration_milliseconds_sum",
+ - + - -
+ - + - -
- - ]
3305 : : "code", http_code_str, "type", artifacttype, deltas*1000); // prometheus prefers _seconds and floating point
3306 [ + - + - : 4278 : inc_metric("http_responses_duration_milliseconds_count",
+ - + - -
+ - + - -
- - ]
3307 : : "code", http_code_str, "type", artifacttype);
3308 : :
3309 [ + - + - : 4278 : add_metric("http_responses_after_you_milliseconds_sum",
+ - + - -
+ - + - -
- - ]
3310 : : "code", http_code_str, "type", artifacttype, afteryou*1000);
3311 [ + - + - : 4278 : inc_metric("http_responses_after_you_milliseconds_count",
+ - + - -
+ - + - -
- - - - ]
3312 : : "code", http_code_str, "type", artifacttype);
3313 : :
3314 [ - + ]: 2139 : return rc;
3315 [ + + + + : 9275 : }
- + + + ]
3316 : :
3317 : :
3318 : : ////////////////////////////////////////////////////////////////////////
3319 : : // borrowed originally from src/nm.c get_local_names()
3320 : :
3321 : : static void
3322 : 218 : dwarf_extract_source_paths (Elf *elf, set<string>& debug_sourcefiles)
3323 : : noexcept // no exceptions - so we can simplify the altdbg resource release at end
3324 : : {
3325 : 218 : Dwarf* dbg = dwarf_begin_elf (elf, DWARF_C_READ, NULL);
3326 [ - + ]: 218 : if (dbg == NULL)
3327 : 0 : return;
3328 : :
3329 : 218 : Dwarf* altdbg = NULL;
3330 : 218 : int altdbg_fd = -1;
3331 : :
3332 : : // DWZ handling: if we have an unsatisfied debug-alt-link, add an
3333 : : // empty string into the outgoing sourcefiles set, so the caller
3334 : : // should know that our data is incomplete.
3335 : 218 : const char *alt_name_p;
3336 : 218 : const void *alt_build_id; // elfutils-owned memory
3337 : 218 : ssize_t sz = dwelf_dwarf_gnu_debugaltlink (dbg, &alt_name_p, &alt_build_id);
3338 [ + + ]: 218 : if (sz > 0) // got one!
3339 : : {
3340 : 44 : string buildid;
3341 : 44 : unsigned char* build_id_bytes = (unsigned char*) alt_build_id;
3342 [ + + ]: 924 : for (ssize_t idx=0; idx<sz; idx++)
3343 : : {
3344 : 880 : buildid += "0123456789abcdef"[build_id_bytes[idx] >> 4];
3345 : 880 : buildid += "0123456789abcdef"[build_id_bytes[idx] & 0xf];
3346 : : }
3347 : :
3348 [ + + ]: 44 : if (verbose > 3)
3349 : 40 : obatched(clog) << "Need altdebug buildid=" << buildid << endl;
3350 : :
3351 : : // but is it unsatisfied the normal elfutils ways?
3352 : 44 : Dwarf* alt = dwarf_getalt (dbg);
3353 [ + - ]: 44 : if (alt == NULL)
3354 : : {
3355 : : // Yup, unsatisfied the normal way. Maybe we can satisfy it
3356 : : // from our own debuginfod database.
3357 : 44 : int alt_fd;
3358 : 44 : struct MHD_Response *r = 0;
3359 : 44 : try
3360 : : {
3361 [ + - ]: 44 : string artifacttype = "debuginfo";
3362 [ + - + - : 44 : r = handle_buildid (0, buildid, artifacttype, "", &alt_fd);
- + - + -
- ]
3363 : 0 : }
3364 [ - - ]: 0 : catch (const reportable_exception& e)
3365 : : {
3366 : : // swallow exceptions
3367 : 0 : }
3368 : :
3369 : : // NB: this is not actually recursive! This invokes the web-query
3370 : : // path, which cannot get back into the scan code paths.
3371 [ + - ]: 44 : if (r)
3372 : : {
3373 : : // Found it!
3374 : 44 : altdbg_fd = dup(alt_fd); // ok if this fails, downstream failures ok
3375 : 44 : alt = altdbg = dwarf_begin (altdbg_fd, DWARF_C_READ);
3376 : : // NB: must close this dwarf and this fd at the bottom of the function!
3377 : 44 : MHD_destroy_response (r); // will close alt_fd
3378 [ + - ]: 44 : if (alt)
3379 : 44 : dwarf_setalt (dbg, alt);
3380 : : }
3381 : : }
3382 : : else
3383 : : {
3384 : : // NB: dwarf_setalt(alt) inappropriate - already done!
3385 : : // NB: altdbg will stay 0 so nothing tries to redundantly dealloc.
3386 : : }
3387 : :
3388 [ + - ]: 44 : if (alt)
3389 : : {
3390 [ + + ]: 44 : if (verbose > 3)
3391 : 40 : obatched(clog) << "Resolved altdebug buildid=" << buildid << endl;
3392 : : }
3393 : : else // (alt == NULL) - signal possible presence of poor debuginfo
3394 : : {
3395 [ # # ]: 0 : debug_sourcefiles.insert("");
3396 [ # # ]: 0 : if (verbose > 3)
3397 : 0 : obatched(clog) << "Unresolved altdebug buildid=" << buildid << endl;
3398 : : }
3399 : 44 : }
3400 : :
3401 : 218 : Dwarf_Off offset = 0;
3402 : 218 : Dwarf_Off old_offset;
3403 : 218 : size_t hsize;
3404 : :
3405 [ + + ]: 2392 : while (dwarf_nextcu (dbg, old_offset = offset, &offset, &hsize, NULL, NULL, NULL) == 0)
3406 : : {
3407 : 2174 : Dwarf_Die cudie_mem;
3408 : 2174 : Dwarf_Die *cudie = dwarf_offdie (dbg, old_offset + hsize, &cudie_mem);
3409 : :
3410 [ - + ]: 2174 : if (cudie == NULL)
3411 : 22 : continue;
3412 [ + + ]: 2174 : if (dwarf_tag (cudie) != DW_TAG_compile_unit)
3413 : 22 : continue;
3414 : :
3415 [ - + ]: 2152 : const char *cuname = dwarf_diename(cudie) ?: "unknown";
3416 : :
3417 : 2152 : Dwarf_Files *files;
3418 : 2152 : size_t nfiles;
3419 [ - + ]: 2152 : if (dwarf_getsrcfiles (cudie, &files, &nfiles) != 0)
3420 : 0 : continue;
3421 : :
3422 : : // extract DW_AT_comp_dir to resolve relative file names
3423 : 2152 : const char *comp_dir = "";
3424 : 2152 : const char *const *dirs;
3425 : 2152 : size_t ndirs;
3426 [ - + ]: 2152 : if (dwarf_getsrcdirs (files, &dirs, &ndirs) == 0 &&
3427 [ - + ]: 2152 : dirs[0] != NULL)
3428 : : comp_dir = dirs[0];
3429 : : if (comp_dir == NULL)
3430 : : comp_dir = "";
3431 : :
3432 [ + + ]: 2152 : if (verbose > 3)
3433 : 328 : obatched(clog) << "searching for sources for cu=" << cuname << " comp_dir=" << comp_dir
3434 : 164 : << " #files=" << nfiles << " #dirs=" << ndirs << endl;
3435 : :
3436 [ + - - - ]: 2152 : if (comp_dir[0] == '\0' && cuname[0] != '/')
3437 : : {
3438 [ # # ]: 0 : if (verbose > 3)
3439 : 0 : obatched(clog) << "skipping cu=" << cuname << " due to empty comp_dir" << endl;
3440 : 0 : continue;
3441 : : }
3442 : :
3443 [ + + ]: 33722 : for (size_t f = 1; f < nfiles; f++)
3444 : : {
3445 : 31570 : const char *hat = dwarf_filesrc (files, f, NULL, NULL);
3446 [ - + ]: 31570 : if (hat == NULL)
3447 : 0 : continue;
3448 : :
3449 [ + + - + ]: 60266 : if (string(hat) == "<built-in>") // gcc intrinsics, don't bother record
3450 : 0 : continue;
3451 : :
3452 [ + + ]: 31570 : string waldo;
3453 [ + + ]: 31570 : if (hat[0] == '/') // absolute
3454 [ - + ]: 19892 : waldo = (string (hat));
3455 [ + - ]: 11678 : else if (comp_dir[0] != '\0') // comp_dir relative
3456 [ - + - + : 20482 : waldo = (string (comp_dir) + string("/") + string (hat));
- + - + +
+ ]
3457 : : else
3458 : : {
3459 [ # # ]: 0 : if (verbose > 3)
3460 : 0 : obatched(clog) << "skipping hat=" << hat << " due to empty comp_dir" << endl;
3461 [ # # ]: 0 : continue;
3462 : : }
3463 : :
3464 : : // NB: this is the 'waldo' that a dbginfo client will have
3465 : : // to supply for us to give them the file The comp_dir
3466 : : // prefixing is a definite complication. Otherwise we'd
3467 : : // have to return a setof comp_dirs (one per CU!) with
3468 : : // corresponding filesrc[] names, instead of one absolute
3469 : : // resoved set. Maybe we'll have to do that anyway. XXX
3470 : :
3471 [ + + ]: 31570 : if (verbose > 4)
3472 [ - + ]: 100 : obatched(clog) << waldo
3473 [ - + ]: 50 : << (debug_sourcefiles.find(waldo)==debug_sourcefiles.end() ? " new" : " dup") << endl;
3474 : :
3475 [ + - ]: 31570 : debug_sourcefiles.insert (waldo);
3476 : 31570 : }
3477 : : }
3478 : :
3479 : 218 : dwarf_end(dbg);
3480 [ + + ]: 218 : if (altdbg)
3481 : 44 : dwarf_end(altdbg);
3482 [ + + ]: 218 : if (altdbg_fd >= 0)
3483 : 44 : close(altdbg_fd);
3484 : : }
3485 : :
3486 : :
3487 : :
3488 : : static void
3489 : 1059 : elf_classify (int fd, bool &executable_p, bool &debuginfo_p, string &buildid, set<string>& debug_sourcefiles)
3490 : : {
3491 : 1059 : Elf *elf = elf_begin (fd, ELF_C_READ_MMAP_PRIVATE, NULL);
3492 [ + - ]: 1060 : if (elf == NULL)
3493 : : return;
3494 : :
3495 : 1060 : try // catch our types of errors and clean up the Elf* object
3496 : : {
3497 [ + - + + ]: 1060 : if (elf_kind (elf) != ELF_K_ELF)
3498 : : {
3499 [ + - ]: 632 : elf_end (elf);
3500 : 632 : return;
3501 : : }
3502 : :
3503 : 428 : GElf_Ehdr ehdr_storage;
3504 [ + - ]: 428 : GElf_Ehdr *ehdr = gelf_getehdr (elf, &ehdr_storage);
3505 [ - + ]: 428 : if (ehdr == NULL)
3506 : : {
3507 [ # # ]: 0 : elf_end (elf);
3508 : : return;
3509 : : }
3510 : 428 : auto elf_type = ehdr->e_type;
3511 : :
3512 : 428 : const void *build_id; // elfutils-owned memory
3513 [ + - ]: 428 : ssize_t sz = dwelf_elf_gnu_build_id (elf, & build_id);
3514 [ - + ]: 426 : if (sz <= 0)
3515 : : {
3516 : : // It's not a diagnostic-worthy error for an elf file to lack build-id.
3517 : : // It might just be very old.
3518 [ # # ]: 0 : elf_end (elf);
3519 : : return;
3520 : : }
3521 : :
3522 : : // build_id is a raw byte array; convert to hexadecimal *lowercase*
3523 : 426 : unsigned char* build_id_bytes = (unsigned char*) build_id;
3524 [ + + ]: 8964 : for (ssize_t idx=0; idx<sz; idx++)
3525 : : {
3526 [ + - ]: 8536 : buildid += "0123456789abcdef"[build_id_bytes[idx] >> 4];
3527 [ + - ]: 17069 : buildid += "0123456789abcdef"[build_id_bytes[idx] & 0xf];
3528 : : }
3529 : :
3530 : : // now decide whether it's an executable - namely, any allocatable section has
3531 : : // PROGBITS;
3532 [ + + ]: 428 : if (elf_type == ET_EXEC || elf_type == ET_DYN)
3533 : : {
3534 : 376 : size_t shnum;
3535 [ + - ]: 376 : int rc = elf_getshdrnum (elf, &shnum);
3536 [ - + ]: 376 : if (rc < 0)
3537 [ # # # # ]: 0 : throw elfutils_exception(rc, "getshdrnum");
3538 : :
3539 : 376 : executable_p = false;
3540 [ + + ]: 7226 : for (size_t sc = 0; sc < shnum; sc++)
3541 : : {
3542 [ + - ]: 7044 : Elf_Scn *scn = elf_getscn (elf, sc);
3543 [ - + ]: 7042 : if (scn == NULL)
3544 : 0 : continue;
3545 : :
3546 : 7042 : GElf_Shdr shdr_mem;
3547 [ + - ]: 7042 : GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
3548 [ - + ]: 7044 : if (shdr == NULL)
3549 : 0 : continue;
3550 : :
3551 : : // allocated (loadable / vm-addr-assigned) section with available content?
3552 [ + + + + ]: 7044 : if ((shdr->sh_type == SHT_PROGBITS) && (shdr->sh_flags & SHF_ALLOC))
3553 : : {
3554 [ + + ]: 194 : if (verbose > 4)
3555 [ + - + - : 32 : obatched(clog) << "executable due to SHF_ALLOC SHT_PROGBITS sc=" << sc << endl;
+ - ]
3556 : 194 : executable_p = true;
3557 : 194 : break; // no need to keep looking for others
3558 : : }
3559 : : } // iterate over sections
3560 : : } // executable_p classification
3561 : :
3562 : : // now decide whether it's a debuginfo - namely, if it has any .debug* or .zdebug* sections
3563 : : // logic mostly stolen from fweimer@redhat.com's elfclassify drafts
3564 : 428 : size_t shstrndx;
3565 [ + - ]: 428 : int rc = elf_getshdrstrndx (elf, &shstrndx);
3566 [ - + ]: 428 : if (rc < 0)
3567 [ # # # # ]: 0 : throw elfutils_exception(rc, "getshdrstrndx");
3568 : :
3569 : : Elf_Scn *scn = NULL;
3570 : : bool symtab_p = false;
3571 : : bool bits_alloc_p = false;
3572 : 22020 : while (true)
3573 : : {
3574 [ + - ]: 11224 : scn = elf_nextscn (elf, scn);
3575 [ + + ]: 11149 : if (scn == NULL)
3576 : : break;
3577 : 10939 : GElf_Shdr shdr_storage;
3578 [ + - ]: 10939 : GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_storage);
3579 [ - + ]: 10941 : if (shdr == NULL)
3580 : : break;
3581 [ + - ]: 10941 : const char *section_name = elf_strptr (elf, shstrndx, shdr->sh_name);
3582 [ - + ]: 11014 : if (section_name == NULL)
3583 : : break;
3584 [ + + ]: 11014 : if (startswith (section_name, ".debug_line") ||
3585 [ - + ]: 10796 : startswith (section_name, ".zdebug_line"))
3586 : : {
3587 : 218 : debuginfo_p = true;
3588 [ + - ]: 218 : if (scan_source_info)
3589 : 218 : dwarf_extract_source_paths (elf, debug_sourcefiles);
3590 : : break; // expecting only one .*debug_line, so no need to look for others
3591 : : }
3592 [ + + ]: 10796 : else if (startswith (section_name, ".debug_") ||
3593 [ + # ]: 10047 : startswith (section_name, ".zdebug_"))
3594 : : {
3595 : 690 : debuginfo_p = true;
3596 : : // NB: don't break; need to parse .debug_line for sources
3597 : : }
3598 [ + + ]: 10106 : else if (shdr->sh_type == SHT_SYMTAB)
3599 : : {
3600 : : symtab_p = true;
3601 : : }
3602 : 10082 : else if (shdr->sh_type != SHT_NOBITS
3603 [ + + ]: 10082 : && shdr->sh_type != SHT_NOTE
3604 [ + + ]: 4850 : && (shdr->sh_flags & SHF_ALLOC) != 0)
3605 : : {
3606 : 10796 : bits_alloc_p = true;
3607 : : }
3608 : 10796 : }
3609 : :
3610 : : // For more expansive elf/split-debuginfo classification, we
3611 : : // want to identify as debuginfo "strip -s"-produced files
3612 : : // without .debug_info* (like libicudata), but we don't want to
3613 : : // identify "strip -g" executables (with .symtab left there).
3614 [ - + ]: 428 : if (symtab_p && !bits_alloc_p)
3615 : 0 : debuginfo_p = true;
3616 : : }
3617 [ # # ]: 0 : catch (const reportable_exception& e)
3618 : : {
3619 [ # # ]: 0 : e.report(clog);
3620 : 0 : }
3621 : 428 : elf_end (elf);
3622 : : }
3623 : :
3624 : :
3625 : : // Intern the given file name in two parts (dirname & basename) and
3626 : : // return the resulting file's id.
3627 : : static int64_t
3628 : 8886 : register_file_name(sqlite_ps& ps_upsert_fileparts,
3629 : : sqlite_ps& ps_upsert_file,
3630 : : sqlite_ps& ps_lookup_file,
3631 : : const string& name)
3632 : : {
3633 : 8886 : std::size_t slash = name.rfind('/');
3634 [ + + ]: 8886 : string dirname, filename;
3635 [ + + ]: 8886 : if (slash == std::string::npos)
3636 : : {
3637 [ + - ]: 90 : dirname = "";
3638 [ + - ]: 90 : filename = name;
3639 : : }
3640 : : else
3641 : : {
3642 [ + - - + ]: 8796 : dirname = name.substr(0, slash);
3643 [ + - - + : 8795 : filename = name.substr(slash+1);
- - ]
3644 : : }
3645 : : // NB: see also handle_metadata()
3646 : :
3647 : : // intern the two substrings
3648 : 8886 : ps_upsert_fileparts
3649 [ + - ]: 8886 : .reset()
3650 [ + - ]: 8886 : .bind(1, dirname)
3651 [ + - ]: 8885 : .step_ok_done();
3652 : 8886 : ps_upsert_fileparts
3653 [ + - ]: 8886 : .reset()
3654 [ + - ]: 8886 : .bind(1, filename)
3655 [ + - ]: 8886 : .step_ok_done();
3656 : :
3657 : : // intern the tuple
3658 : 8886 : ps_upsert_file
3659 [ + - ]: 8886 : .reset()
3660 [ + - ]: 8886 : .bind(1, dirname)
3661 [ + - ]: 8886 : .bind(2, filename)
3662 [ + - ]: 8886 : .step_ok_done();
3663 : :
3664 : : // look up the tuple's id
3665 : 8886 : ps_lookup_file
3666 [ + - ]: 8886 : .reset()
3667 [ + - ]: 8886 : .bind(1, dirname)
3668 [ + - ]: 8886 : .bind(2, filename);
3669 [ + - ]: 8886 : int rc = ps_lookup_file.step();
3670 [ - + - - : 8886 : if (rc != SQLITE_ROW) throw sqlite_exception(rc, "step");
- - ]
3671 : :
3672 [ + - ]: 8886 : int64_t id = sqlite3_column_int64 (ps_lookup_file, 0);
3673 [ + - ]: 8886 : ps_lookup_file.reset();
3674 [ + + ]: 8886 : return id;
3675 [ + + ]: 17032 : }
3676 : :
3677 : :
3678 : :
3679 : : static void
3680 : 728 : scan_source_file (const string& rps, const stat_t& st,
3681 : : sqlite_ps& ps_upsert_buildids,
3682 : : sqlite_ps& ps_upsert_fileparts,
3683 : : sqlite_ps& ps_upsert_file,
3684 : : sqlite_ps& ps_lookup_file,
3685 : : sqlite_ps& ps_upsert_de,
3686 : : sqlite_ps& ps_upsert_s,
3687 : : sqlite_ps& ps_query,
3688 : : sqlite_ps& ps_scan_done,
3689 : : unsigned& fts_cached,
3690 : : unsigned& fts_executable,
3691 : : unsigned& fts_debuginfo,
3692 : : unsigned& fts_sourcefiles)
3693 : : {
3694 : 728 : int64_t fileid = register_file_name(ps_upsert_fileparts, ps_upsert_file, ps_lookup_file, rps);
3695 : :
3696 : : /* See if we know of it already. */
3697 : 728 : int rc = ps_query
3698 : 728 : .reset()
3699 : 728 : .bind(1, fileid)
3700 : 728 : .bind(2, st.st_mtime)
3701 : 728 : .step();
3702 : 728 : ps_query.reset();
3703 [ + + ]: 728 : if (rc == SQLITE_ROW) // i.e., a result, as opposed to DONE (no results)
3704 : : // no need to recheck a file/version we already know
3705 : : // specifically, no need to elf-begin a file we already determined is non-elf
3706 : : // (so is stored with buildid=NULL)
3707 : : {
3708 : 368 : fts_cached++;
3709 : 368 : return;
3710 : : }
3711 : :
3712 : 360 : bool executable_p = false, debuginfo_p = false; // E and/or D
3713 [ + - ]: 360 : string buildid;
3714 [ + - ]: 360 : set<string> sourcefiles;
3715 : :
3716 [ + - ]: 360 : int fd = open (rps.c_str(), O_RDONLY);
3717 : 360 : try
3718 : : {
3719 [ + - ]: 360 : if (fd >= 0)
3720 [ + - ]: 360 : elf_classify (fd, executable_p, debuginfo_p, buildid, sourcefiles);
3721 : : else
3722 [ # # # # : 0 : throw libc_exception(errno, string("open ") + rps);
# # # # ]
3723 [ + - + - : 720 : add_metric ("scanned_bytes_total","source","file",
+ - - + -
+ - - -
- ]
3724 [ + - ]: 360 : st.st_size);
3725 [ + - + - : 720 : inc_metric ("scanned_files_total","source","file");
+ - + - -
+ - + - -
- - ]
3726 : : }
3727 : : // NB: we catch exceptions here too, so that we can
3728 : : // cache the corrupt-elf case (!executable_p &&
3729 : : // !debuginfo_p) just below, just as if we had an
3730 : : // EPERM error from open(2).
3731 [ - - ]: 0 : catch (const reportable_exception& e)
3732 : : {
3733 [ - - ]: 0 : e.report(clog);
3734 : 0 : }
3735 : :
3736 [ + - ]: 360 : if (fd >= 0)
3737 [ + - ]: 360 : close (fd);
3738 : :
3739 [ + + ]: 360 : if (buildid == "")
3740 : : {
3741 : : // no point storing an elf file without buildid
3742 : 298 : executable_p = false;
3743 : 298 : debuginfo_p = false;
3744 : : }
3745 : : else
3746 : : {
3747 : : // register this build-id in the interning table
3748 : 62 : ps_upsert_buildids
3749 [ + - ]: 62 : .reset()
3750 [ + - ]: 62 : .bind(1, buildid)
3751 [ + - ]: 62 : .step_ok_done();
3752 : : }
3753 : :
3754 [ + + ]: 360 : if (executable_p)
3755 : 38 : fts_executable ++;
3756 [ + + ]: 360 : if (debuginfo_p)
3757 : 38 : fts_debuginfo ++;
3758 [ + + + + ]: 360 : if (executable_p || debuginfo_p)
3759 : : {
3760 : 62 : ps_upsert_de
3761 [ + - ]: 62 : .reset()
3762 [ + - ]: 62 : .bind(1, buildid)
3763 [ + + + - ]: 86 : .bind(2, debuginfo_p ? 1 : 0)
3764 [ + + + - ]: 86 : .bind(3, executable_p ? 1 : 0)
3765 [ + - ]: 62 : .bind(4, fileid)
3766 [ + - ]: 62 : .bind(5, st.st_mtime)
3767 [ + - ]: 62 : .step_ok_done();
3768 : : }
3769 [ + + ]: 360 : if (executable_p)
3770 [ + - + - : 76 : inc_metric("found_executable_total","source","files");
+ - + - -
+ - + - -
- - ]
3771 [ + + ]: 360 : if (debuginfo_p)
3772 [ + - + - : 76 : inc_metric("found_debuginfo_total","source","files");
+ - + - -
+ - + - -
- - ]
3773 : :
3774 [ + + + - ]: 398 : if (sourcefiles.size() && buildid != "")
3775 : : {
3776 : 38 : fts_sourcefiles += sourcefiles.size();
3777 : :
3778 [ + + ]: 3202 : for (auto&& dwarfsrc : sourcefiles)
3779 : : {
3780 [ + - ]: 3164 : char *srp = realpath(dwarfsrc.c_str(), NULL);
3781 [ + + ]: 3164 : if (srp == NULL) // also if DWZ unresolved dwarfsrc=""
3782 : 36 : continue; // unresolvable files are not a serious problem
3783 : : // throw libc_exception(errno, "fts/file realpath " + srcpath);
3784 [ + - ]: 3128 : string srps = string(srp);
3785 : 3128 : free (srp);
3786 : :
3787 : 3128 : struct stat sfs;
3788 : 3128 : rc = stat(srps.c_str(), &sfs);
3789 [ - + ]: 3128 : if (rc != 0)
3790 [ # # ]: 0 : continue;
3791 : :
3792 [ + - ]: 3128 : if (verbose > 2)
3793 [ + - + - : 9384 : obatched(clog) << "recorded buildid=" << buildid << " file=" << srps
- - ]
3794 [ + - + - : 3128 : << " mtime=" << sfs.st_mtime
+ - + - ]
3795 [ + - + - : 3128 : << " as source " << dwarfsrc << endl;
+ - ]
3796 : :
3797 : : // PR25548: store canonicalized dwarfsrc path
3798 [ + - ]: 3128 : string dwarfsrc_canon = canon_pathname (dwarfsrc);
3799 [ + + ]: 3128 : if (dwarfsrc_canon != dwarfsrc)
3800 : : {
3801 [ + + ]: 520 : if (verbose > 3)
3802 [ + - + - : 20 : obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
+ - + - +
- ]
3803 : : }
3804 : :
3805 [ + - ]: 3128 : int64_t fileid1 = register_file_name (ps_upsert_fileparts, ps_upsert_file, ps_lookup_file, dwarfsrc_canon);
3806 [ + - ]: 3128 : int64_t fileid2 = register_file_name (ps_upsert_fileparts, ps_upsert_file, ps_lookup_file, srps);
3807 : :
3808 : 3128 : ps_upsert_s
3809 [ + - ]: 3128 : .reset()
3810 [ + - ]: 3128 : .bind(1, buildid)
3811 [ + - ]: 3128 : .bind(2, fileid1)
3812 [ + - ]: 3128 : .bind(3, fileid2)
3813 [ + - ]: 3128 : .bind(4, sfs.st_mtime)
3814 [ + - ]: 3128 : .step_ok_done();
3815 : :
3816 [ + - + - : 6256 : inc_metric("found_sourcerefs_total","source","files");
+ - + - -
+ - + + -
- - - - -
- ]
3817 [ + - ]: 6292 : }
3818 : : }
3819 : :
3820 : 360 : ps_scan_done
3821 [ + - ]: 360 : .reset()
3822 [ + - ]: 360 : .bind(1, fileid)
3823 [ + - ]: 360 : .bind(2, st.st_mtime)
3824 [ + - ]: 360 : .bind(3, st.st_size)
3825 [ + - ]: 360 : .step_ok_done();
3826 : :
3827 [ + - ]: 360 : if (verbose > 2)
3828 [ + - + - ]: 1080 : obatched(clog) << "recorded buildid=" << buildid << " file=" << rps
3829 [ + - + - : 360 : << " mtime=" << st.st_mtime << " atype="
+ - + - ]
3830 : : << (executable_p ? "E" : "")
3831 [ + - + + : 1004 : << (debuginfo_p ? "D" : "") << endl;
+ - + + +
- + - ]
3832 [ + + ]: 422 : }
3833 : :
3834 : :
3835 : :
3836 : :
3837 : :
3838 : : // Analyze given archive file of given age; record buildids / exec/debuginfo-ness of its
3839 : : // constituent files with given upsert statements.
3840 : : static void
3841 : 374 : archive_classify (const string& rps, string& archive_extension, int64_t archiveid,
3842 : : sqlite_ps& ps_upsert_buildids, sqlite_ps& ps_upsert_fileparts, sqlite_ps& ps_upsert_file,
3843 : : sqlite_ps& ps_lookup_file,
3844 : : sqlite_ps& ps_upsert_de, sqlite_ps& ps_upsert_sref, sqlite_ps& ps_upsert_sdef,
3845 : : time_t mtime,
3846 : : unsigned& fts_executable, unsigned& fts_debuginfo, unsigned& fts_sref, unsigned& fts_sdef,
3847 : : bool& fts_sref_complete_p)
3848 : : {
3849 : 374 : string archive_decoder = "/dev/null";
3850 [ + + ]: 928 : for (auto&& arch : scan_archives)
3851 [ + + ]: 554 : if (string_endswith(rps, arch.first))
3852 : : {
3853 [ + - ]: 374 : archive_extension = arch.first;
3854 [ + - ]: 928 : archive_decoder = arch.second;
3855 : : }
3856 : :
3857 : 374 : FILE* fp;
3858 : 374 : defer_dtor<FILE*,int>::dtor_fn dfn;
3859 [ + + ]: 374 : if (archive_decoder != "cat")
3860 : : {
3861 [ + - + - : 72 : string popen_cmd = archive_decoder + " " + shell_escape(rps);
+ - - + -
- - - ]
3862 [ + - ]: 36 : fp = popen (popen_cmd.c_str(), "r"); // "e" O_CLOEXEC?
3863 : 36 : dfn = pclose;
3864 [ - + ]: 36 : if (fp == NULL)
3865 [ # # # # : 0 : throw libc_exception (errno, string("popen ") + popen_cmd);
# # # # ]
3866 : 36 : }
3867 : : else
3868 : : {
3869 [ + - ]: 338 : fp = fopen (rps.c_str(), "r");
3870 : 338 : dfn = fclose;
3871 [ - + ]: 338 : if (fp == NULL)
3872 [ # # # # : 0 : throw libc_exception (errno, string("fopen ") + rps);
# # # # ]
3873 : : }
3874 : 374 : defer_dtor<FILE*,int> fp_closer (fp, dfn);
3875 : :
3876 : 374 : struct archive *a;
3877 [ + - ]: 374 : a = archive_read_new();
3878 [ - + ]: 374 : if (a == NULL)
3879 [ # # # # ]: 0 : throw archive_exception("cannot create archive reader");
3880 : 374 : defer_dtor<struct archive*,int> archive_closer (a, archive_read_free);
3881 : :
3882 [ + - ]: 374 : int rc = archive_read_support_format_all(a);
3883 [ - + ]: 374 : if (rc != ARCHIVE_OK)
3884 [ # # # # ]: 0 : throw archive_exception(a, "cannot select all formats");
3885 [ + - ]: 374 : rc = archive_read_support_filter_all(a);
3886 [ - + ]: 374 : if (rc != ARCHIVE_OK)
3887 [ # # # # ]: 0 : throw archive_exception(a, "cannot select all filters");
3888 : :
3889 [ + - ]: 374 : rc = archive_read_open_FILE (a, fp);
3890 [ - + ]: 374 : if (rc != ARCHIVE_OK)
3891 : : {
3892 [ # # # # : 0 : obatched(clog) << "cannot open archive from pipe " << rps << endl;
# # ]
3893 [ # # # # ]: 0 : throw archive_exception(a, "cannot open archive from pipe");
3894 : : }
3895 : :
3896 [ + + ]: 374 : if (verbose > 3)
3897 [ + - + - : 692 : obatched(clog) << "libarchive scanning " << rps << " id " << archiveid << endl;
+ - + - +
- ]
3898 : :
3899 : : bool any_exceptions = false;
3900 : 2666 : while(1) // parse archive entries
3901 : : {
3902 [ + - ]: 2666 : if (interrupted)
3903 : : break;
3904 : :
3905 : 2666 : try
3906 : : {
3907 : 2666 : struct archive_entry *e;
3908 [ + - ]: 2666 : rc = archive_read_next_header (a, &e);
3909 [ + + ]: 2666 : if (rc != ARCHIVE_OK)
3910 : : break;
3911 : :
3912 [ + - + + ]: 2292 : if (! S_ISREG(archive_entry_mode (e))) // skip non-files completely
3913 : 1592 : continue;
3914 : :
3915 [ + - ]: 700 : string fn = canonicalized_archive_entry_pathname (e);
3916 : :
3917 [ + + ]: 700 : if (verbose > 3)
3918 [ + - + - : 1268 : obatched(clog) << "libarchive checking " << fn << endl;
+ - - - ]
3919 : :
3920 : : // extract this file to a temporary file
3921 : 700 : char* tmppath = NULL;
3922 : 700 : rc = asprintf (&tmppath, "%s/debuginfod-classify.XXXXXX", tmpdir.c_str());
3923 [ - + ]: 700 : if (rc < 0)
3924 [ # # # # ]: 0 : throw libc_exception (ENOMEM, "cannot allocate tmppath");
3925 : 700 : defer_dtor<void*,void> tmmpath_freer (tmppath, free);
3926 [ + - ]: 700 : int fd = mkstemp (tmppath);
3927 [ - + ]: 700 : if (fd < 0)
3928 [ # # # # ]: 0 : throw libc_exception (errno, "cannot create temporary file");
3929 : 700 : unlink (tmppath); // unlink now so OS will release the file as soon as we close the fd
3930 : 700 : defer_dtor<int,int> minifd_closer (fd, close);
3931 : :
3932 [ + - ]: 700 : rc = archive_read_data_into_fd (a, fd);
3933 [ - + ]: 699 : if (rc != ARCHIVE_OK) {
3934 [ # # ]: 0 : close (fd);
3935 [ # # # # ]: 0 : throw archive_exception(a, "cannot extract file");
3936 : : }
3937 : :
3938 : : // finally ... time to run elf_classify on this bad boy and update the database
3939 : 699 : bool executable_p = false, debuginfo_p = false;
3940 [ + - ]: 699 : string buildid;
3941 [ + - ]: 699 : set<string> sourcefiles;
3942 [ + - ]: 699 : elf_classify (fd, executable_p, debuginfo_p, buildid, sourcefiles);
3943 : : // NB: might throw
3944 : :
3945 [ + + ]: 700 : if (buildid != "") // intern buildid
3946 : : {
3947 : 366 : ps_upsert_buildids
3948 [ + - ]: 366 : .reset()
3949 [ + - ]: 366 : .bind(1, buildid)
3950 [ + - ]: 366 : .step_ok_done();
3951 : : }
3952 : :
3953 [ + - ]: 700 : int64_t fileid = register_file_name (ps_upsert_fileparts, ps_upsert_file, ps_lookup_file, fn);
3954 : :
3955 [ + + ]: 700 : if (sourcefiles.size() > 0) // sref records needed
3956 : : {
3957 : : // NB: we intern each source file once. Once raw, as it
3958 : : // appears in the DWARF file list coming back from
3959 : : // elf_classify() - because it'll end up in the
3960 : : // _norm.artifactsrc column. We don't also put another
3961 : : // version with a '.' at the front, even though that's
3962 : : // how rpm/cpio packs names, because we hide that from
3963 : : // the database for storage efficiency.
3964 : :
3965 [ + + ]: 646 : for (auto&& s : sourcefiles)
3966 : : {
3967 [ - + ]: 488 : if (s == "")
3968 : : {
3969 : 0 : fts_sref_complete_p = false;
3970 : 0 : continue;
3971 : : }
3972 : :
3973 : : // PR25548: store canonicalized source path
3974 : 488 : const string& dwarfsrc = s;
3975 [ + - ]: 488 : string dwarfsrc_canon = canon_pathname (dwarfsrc);
3976 [ + + ]: 488 : if (dwarfsrc_canon != dwarfsrc)
3977 : : {
3978 [ + - ]: 32 : if (verbose > 3)
3979 [ + - + - : 64 : obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
+ - + - +
- - - ]
3980 : : }
3981 : :
3982 [ + - ]: 488 : int64_t srcfileid = register_file_name(ps_upsert_fileparts, ps_upsert_file, ps_lookup_file,
3983 : : dwarfsrc_canon);
3984 : :
3985 : 488 : ps_upsert_sref
3986 [ + - ]: 488 : .reset()
3987 [ + - ]: 488 : .bind(1, buildid)
3988 [ + - ]: 488 : .bind(2, srcfileid)
3989 [ + - ]: 488 : .step_ok_done();
3990 : :
3991 [ + - ]: 488 : fts_sref ++;
3992 : 488 : }
3993 : : }
3994 : :
3995 [ + + ]: 700 : if (executable_p)
3996 : 156 : fts_executable ++;
3997 [ + + ]: 700 : if (debuginfo_p)
3998 : 210 : fts_debuginfo ++;
3999 : :
4000 [ + + + + ]: 700 : if (executable_p || debuginfo_p)
4001 : : {
4002 : 366 : ps_upsert_de
4003 [ + - ]: 366 : .reset()
4004 [ + - ]: 366 : .bind(1, buildid)
4005 [ + + + - ]: 522 : .bind(2, debuginfo_p ? 1 : 0)
4006 [ + + + - ]: 576 : .bind(3, executable_p ? 1 : 0)
4007 [ + - ]: 366 : .bind(4, archiveid)
4008 [ + - ]: 366 : .bind(5, mtime)
4009 [ + - ]: 366 : .bind(6, fileid)
4010 [ + - ]: 366 : .step_ok_done();
4011 : : }
4012 : : else // potential source - sdef record
4013 : : {
4014 : 334 : fts_sdef ++;
4015 : 334 : ps_upsert_sdef
4016 [ + - ]: 334 : .reset()
4017 [ + - ]: 334 : .bind(1, archiveid)
4018 [ + - ]: 334 : .bind(2, mtime)
4019 [ + - ]: 334 : .bind(3, fileid)
4020 [ + - ]: 334 : .step_ok_done();
4021 : : }
4022 : :
4023 [ + - + + : 700 : if ((verbose > 2) && (executable_p || debuginfo_p))
+ + ]
4024 [ + - + - ]: 1098 : obatched(clog) << "recorded buildid=" << buildid << " rpm=" << rps << " file=" << fn
4025 [ + - + - : 366 : << " mtime=" << mtime << " atype="
+ - + - +
- + - ]
4026 : : << (executable_p ? "E" : "")
4027 : : << (debuginfo_p ? "D" : "")
4028 [ + - + + : 732 : << " sourcefiles=" << sourcefiles.size() << endl;
+ - + + +
- + - + -
+ - ]
4029 : :
4030 [ + + + + ]: 1596 : }
4031 [ - - ]: 0 : catch (const reportable_exception& e)
4032 : : {
4033 [ - - ]: 0 : e.report(clog);
4034 : 0 : any_exceptions = true;
4035 : : // NB: but we allow the libarchive iteration to continue, in
4036 : : // case we can still gather some useful information. That
4037 : : // would allow some webapi queries to work, until later when
4038 : : // this archive is rescanned. (Its vitals won't go into the
4039 : : // _file_mtime_scanned table until after a successful scan.)
4040 : 0 : }
4041 : : }
4042 : :
4043 [ - + ]: 374 : if (any_exceptions)
4044 [ # # # # ]: 0 : throw reportable_exception("exceptions encountered during archive scan");
4045 [ + + ]: 394 : }
4046 : :
4047 : :
4048 : :
4049 : : // scan for archive files such as .rpm
4050 : : static void
4051 : 714 : scan_archive_file (const string& rps, const stat_t& st,
4052 : : sqlite_ps& ps_upsert_buildids,
4053 : : sqlite_ps& ps_upsert_fileparts,
4054 : : sqlite_ps& ps_upsert_file,
4055 : : sqlite_ps& ps_lookup_file,
4056 : : sqlite_ps& ps_upsert_de,
4057 : : sqlite_ps& ps_upsert_sref,
4058 : : sqlite_ps& ps_upsert_sdef,
4059 : : sqlite_ps& ps_query,
4060 : : sqlite_ps& ps_scan_done,
4061 : : unsigned& fts_cached,
4062 : : unsigned& fts_executable,
4063 : : unsigned& fts_debuginfo,
4064 : : unsigned& fts_sref,
4065 : : unsigned& fts_sdef)
4066 : : {
4067 : : // intern the archive file name
4068 : 714 : int64_t archiveid = register_file_name (ps_upsert_fileparts, ps_upsert_file, ps_lookup_file, rps);
4069 : :
4070 : : /* See if we know of it already. */
4071 : 714 : int rc = ps_query
4072 : 714 : .reset()
4073 : 714 : .bind(1, archiveid)
4074 : 714 : .bind(2, st.st_mtime)
4075 : 714 : .step();
4076 : 714 : ps_query.reset();
4077 [ + + ]: 714 : if (rc == SQLITE_ROW) // i.e., a result, as opposed to DONE (no results)
4078 : : // no need to recheck a file/version we already know
4079 : : // specifically, no need to parse this archive again, since we already have
4080 : : // it as a D or E or S record,
4081 : : // (so is stored with buildid=NULL)
4082 : : {
4083 : 340 : fts_cached ++;
4084 : 340 : return;
4085 : : }
4086 : :
4087 : : // extract the archive contents
4088 : 374 : unsigned my_fts_executable = 0, my_fts_debuginfo = 0, my_fts_sref = 0, my_fts_sdef = 0;
4089 : 374 : bool my_fts_sref_complete_p = true;
4090 : 374 : bool any_exceptions = false;
4091 : 374 : try
4092 : : {
4093 [ + - ]: 374 : string archive_extension;
4094 : 374 : archive_classify (rps, archive_extension, archiveid,
4095 : : ps_upsert_buildids, ps_upsert_fileparts, ps_upsert_file, ps_lookup_file,
4096 : : ps_upsert_de, ps_upsert_sref, ps_upsert_sdef, // dalt
4097 [ + - ]: 374 : st.st_mtime,
4098 : : my_fts_executable, my_fts_debuginfo, my_fts_sref, my_fts_sdef,
4099 : : my_fts_sref_complete_p);
4100 [ + - + - : 748 : add_metric ("scanned_bytes_total","source",archive_extension + " archive",
+ - - + +
+ - - -
- ]
4101 [ + - ]: 374 : st.st_size);
4102 [ + - + - : 748 : inc_metric ("scanned_files_total","source",archive_extension + " archive");
+ - + - -
+ + + - -
- - ]
4103 [ + - + - : 748 : add_metric("found_debuginfo_total","source",archive_extension + " archive",
+ - + - -
+ + + - -
- - ]
4104 : : my_fts_debuginfo);
4105 [ + - + - : 748 : add_metric("found_executable_total","source",archive_extension + " archive",
+ - + - -
+ + + - -
- - ]
4106 : : my_fts_executable);
4107 [ + - + - : 764 : add_metric("found_sourcerefs_total","source",archive_extension + " archive",
+ - + - -
+ + + - +
- - - - -
- ]
4108 : : my_fts_sref);
4109 : 374 : }
4110 [ - - ]: 0 : catch (const reportable_exception& e)
4111 : : {
4112 [ - - ]: 0 : e.report(clog);
4113 : 0 : any_exceptions = true;
4114 : 0 : }
4115 : :
4116 [ + - ]: 374 : if (verbose > 2)
4117 [ + - ]: 1122 : obatched(clog) << "scanned archive=" << rps
4118 [ + - + - ]: 374 : << " mtime=" << st.st_mtime
4119 [ + - ]: 374 : << " executables=" << my_fts_executable
4120 [ + - + - ]: 374 : << " debuginfos=" << my_fts_debuginfo
4121 [ + - + - ]: 374 : << " srefs=" << my_fts_sref
4122 [ + - + - ]: 374 : << " sdefs=" << my_fts_sdef
4123 [ + - + - : 374 : << " exceptions=" << any_exceptions
+ - + - ]
4124 : 374 : << endl;
4125 : :
4126 : 374 : fts_executable += my_fts_executable;
4127 : 374 : fts_debuginfo += my_fts_debuginfo;
4128 : 374 : fts_sref += my_fts_sref;
4129 : 374 : fts_sdef += my_fts_sdef;
4130 : :
4131 [ - + ]: 374 : if (any_exceptions)
4132 [ # # # # ]: 0 : throw reportable_exception("exceptions encountered during archive scan");
4133 : :
4134 [ + - ]: 374 : if (my_fts_sref_complete_p) // leave incomplete?
4135 : 374 : ps_scan_done
4136 : 374 : .reset()
4137 : 374 : .bind(1, archiveid)
4138 : 374 : .bind(2, st.st_mtime)
4139 : 374 : .bind(3, st.st_size)
4140 : 374 : .step_ok_done();
4141 : : }
4142 : :
4143 : :
4144 : :
4145 : : ////////////////////////////////////////////////////////////////////////
4146 : :
4147 : :
4148 : :
4149 : : // The thread that consumes file names off of the scanq. We hold
4150 : : // the persistent sqlite_ps's at this level and delegate file/archive
4151 : : // scanning to other functions.
4152 : : static void
4153 : 272 : scan ()
4154 : : {
4155 : : // all the prepared statements fit to use, the _f_ set:
4156 [ + - + - : 544 : sqlite_ps ps_f_upsert_buildids (db, "file-buildids-intern", "insert or ignore into " BUILDIDS "_buildids VALUES (NULL, ?);");
+ - - - ]
4157 [ + - + - : 544 : sqlite_ps ps_f_upsert_fileparts (db, "file-fileparts-intern", "insert or ignore into " BUILDIDS "_fileparts VALUES (NULL, ?);");
+ - + - -
- ]
4158 [ + - ]: 272 : sqlite_ps ps_f_upsert_file (db, "file-file-intern", "insert or ignore into " BUILDIDS "_files VALUES (NULL, \n"
4159 : : "(select id from " BUILDIDS "_fileparts where name = ?),\n"
4160 [ + - + - : 544 : "(select id from " BUILDIDS "_fileparts where name = ?));");
+ - + - -
- ]
4161 [ + - ]: 272 : sqlite_ps ps_f_lookup_file (db, "file-file-lookup",
4162 : : "select f.id\n"
4163 : : " from " BUILDIDS "_files f, " BUILDIDS "_fileparts p1, " BUILDIDS "_fileparts p2 \n"
4164 [ + - + - : 544 : " where f.dirname = p1.id and f.basename = p2.id and p1.name = ? and p2.name = ?;\n");
+ - + - -
- ]
4165 [ + - ]: 272 : sqlite_ps ps_f_upsert_de (db, "file-de-upsert",
4166 : : "insert or ignore into " BUILDIDS "_f_de "
4167 : : "(buildid, debuginfo_p, executable_p, file, mtime) "
4168 : : "values ((select id from " BUILDIDS "_buildids where hex = ?),"
4169 [ + - + - : 544 : " ?,?,?,?);");
+ - + - -
- ]
4170 [ + - ]: 272 : sqlite_ps ps_f_upsert_s (db, "file-s-upsert",
4171 : : "insert or ignore into " BUILDIDS "_f_s "
4172 : : "(buildid, artifactsrc, file, mtime) "
4173 : : "values ((select id from " BUILDIDS "_buildids where hex = ?),"
4174 [ + - + - : 544 : " ?,?,?);");
+ - + - -
- ]
4175 [ + - ]: 272 : sqlite_ps ps_f_query (db, "file-negativehit-find",
4176 : : "select 1 from " BUILDIDS "_file_mtime_scanned where sourcetype = 'F' "
4177 [ + - + - : 544 : "and file = ? and mtime = ?;");
+ - + - -
- ]
4178 [ + - ]: 272 : sqlite_ps ps_f_scan_done (db, "file-scanned",
4179 : : "insert or ignore into " BUILDIDS "_file_mtime_scanned (sourcetype, file, mtime, size)"
4180 [ + - + - : 544 : "values ('F', ?,?,?);");
+ - + - -
- ]
4181 : :
4182 : : // and now for the _r_ set
4183 [ + - + - : 544 : sqlite_ps ps_r_upsert_buildids (db, "rpm-buildid-intern", "insert or ignore into " BUILDIDS "_buildids VALUES (NULL, ?);");
+ - + - -
- ]
4184 [ + - + - : 544 : sqlite_ps ps_r_upsert_fileparts (db, "rpm-fileparts-intern", "insert or ignore into " BUILDIDS "_fileparts VALUES (NULL, ?);");
+ - + - -
- ]
4185 [ + - ]: 272 : sqlite_ps ps_r_upsert_file (db, "rpm-file-intern", "insert or ignore into " BUILDIDS "_files VALUES (NULL, \n"
4186 : : "(select id from " BUILDIDS "_fileparts where name = ?),\n"
4187 [ + - + - : 544 : "(select id from " BUILDIDS "_fileparts where name = ?));");
+ - + - -
- ]
4188 [ + - ]: 272 : sqlite_ps ps_r_lookup_file (db, "rpm-file-lookup",
4189 : : "select f.id\n"
4190 : : " from " BUILDIDS "_files f, " BUILDIDS "_fileparts p1, " BUILDIDS "_fileparts p2 \n"
4191 [ + - + - : 544 : " where f.dirname = p1.id and f.basename = p2.id and p1.name = ? and p2.name = ?;\n");
+ - + - -
- ]
4192 [ + - ]: 272 : sqlite_ps ps_r_upsert_de (db, "rpm-de-insert",
4193 : : "insert or ignore into " BUILDIDS "_r_de (buildid, debuginfo_p, executable_p, file, mtime, content) values ("
4194 [ + - + - : 544 : "(select id from " BUILDIDS "_buildids where hex = ?), ?, ?, ?, ?, ?);");
+ - + - -
- ]
4195 [ + - ]: 272 : sqlite_ps ps_r_upsert_sref (db, "rpm-sref-insert",
4196 : : "insert or ignore into " BUILDIDS "_r_sref (buildid, artifactsrc) values ("
4197 : : "(select id from " BUILDIDS "_buildids where hex = ?), "
4198 [ + - + - : 544 : "?);");
+ - + - -
- ]
4199 [ + - ]: 272 : sqlite_ps ps_r_upsert_sdef (db, "rpm-sdef-insert",
4200 : : "insert or ignore into " BUILDIDS "_r_sdef (file, mtime, content) values ("
4201 [ + - + - : 544 : "?, ?, ?);");
+ - + - -
- ]
4202 [ + - ]: 272 : sqlite_ps ps_r_query (db, "rpm-negativehit-query",
4203 : : "select 1 from " BUILDIDS "_file_mtime_scanned where "
4204 [ + - + - : 544 : "sourcetype = 'R' and file = ? and mtime = ?;");
+ - + - -
- ]
4205 [ + - ]: 272 : sqlite_ps ps_r_scan_done (db, "rpm-scanned",
4206 : : "insert or ignore into " BUILDIDS "_file_mtime_scanned (sourcetype, file, mtime, size)"
4207 [ + - + - : 544 : "values ('R', ?, ?, ?);");
+ - + - -
- ]
4208 : :
4209 : :
4210 : 272 : unsigned fts_cached = 0, fts_executable = 0, fts_debuginfo = 0, fts_sourcefiles = 0;
4211 : 272 : unsigned fts_sref = 0, fts_sdef = 0;
4212 : :
4213 [ + - + - : 544 : add_metric("thread_count", "role", "scan", 1);
+ - + - -
+ - + - -
- - ]
4214 [ + - + - : 544 : add_metric("thread_busy", "role", "scan", 1);
+ - + - -
+ - + - -
- - ]
4215 [ + + ]: 1496 : while (! interrupted)
4216 : : {
4217 [ + - ]: 1224 : scan_payload p;
4218 : :
4219 [ + - + - : 2448 : add_metric("thread_busy", "role", "scan", -1);
+ - + - -
+ - + - -
- - ]
4220 : : // NB: threads may be blocked within either of these two waiting
4221 : : // states, if the work queue happens to run dry. That's OK.
4222 [ + - + - ]: 1224 : if (scan_barrier) scan_barrier->count();
4223 [ + - ]: 1224 : bool gotone = scanq.wait_front(p);
4224 [ + - + - : 2447 : add_metric("thread_busy", "role", "scan", 1);
+ - + - -
+ - + - -
- - ]
4225 : :
4226 [ + + - + ]: 1224 : if (! gotone) continue; // go back to waiting
4227 : :
4228 : 952 : try
4229 : : {
4230 : 952 : bool scan_archive = false;
4231 [ + + ]: 2271 : for (auto&& arch : scan_archives)
4232 [ + + ]: 1319 : if (string_endswith(p.first, arch.first))
4233 : 713 : scan_archive = true;
4234 : :
4235 [ + + ]: 952 : if (scan_archive)
4236 [ + - ]: 714 : scan_archive_file (p.first, p.second,
4237 : : ps_r_upsert_buildids,
4238 : : ps_r_upsert_fileparts,
4239 : : ps_r_upsert_file,
4240 : : ps_r_lookup_file,
4241 : : ps_r_upsert_de,
4242 : : ps_r_upsert_sref,
4243 : : ps_r_upsert_sdef,
4244 : : ps_r_query,
4245 : : ps_r_scan_done,
4246 : : fts_cached,
4247 : : fts_executable,
4248 : : fts_debuginfo,
4249 : : fts_sref,
4250 : : fts_sdef);
4251 : :
4252 [ + + ]: 952 : if (scan_files) // NB: maybe "else if" ?
4253 [ + - ]: 728 : scan_source_file (p.first, p.second,
4254 : : ps_f_upsert_buildids,
4255 : : ps_f_upsert_fileparts,
4256 : : ps_f_upsert_file,
4257 : : ps_f_lookup_file,
4258 : : ps_f_upsert_de,
4259 : : ps_f_upsert_s,
4260 : : ps_f_query,
4261 : : ps_f_scan_done,
4262 : : fts_cached, fts_executable, fts_debuginfo, fts_sourcefiles);
4263 : : }
4264 [ - - ]: 0 : catch (const reportable_exception& e)
4265 : : {
4266 [ - - ]: 0 : e.report(cerr);
4267 : 0 : }
4268 : :
4269 [ + - ]: 952 : scanq.done_front(); // let idlers run
4270 : :
4271 : 952 : if (fts_cached || fts_executable || fts_debuginfo || fts_sourcefiles || fts_sref || fts_sdef)
4272 : : {} // NB: not just if a successful scan - we might have encountered -ENOSPC & failed
4273 [ + - + - ]: 952 : (void) statfs_free_enough_p(db_path, "database"); // report sqlite filesystem size
4274 [ + - + - ]: 952 : (void) statfs_free_enough_p(tmpdir, "tmpdir"); // this too, in case of fdcache/tmpfile usage
4275 : :
4276 : : // finished a scanning step -- not a "loop", because we just
4277 : : // consume the traversal loop's work, whenever
4278 [ + - + - : 1904 : inc_metric("thread_work_total","role","scan");
+ - + - -
+ - + + -
- - - - -
- ]
4279 : 1224 : }
4280 : :
4281 [ + - + - : 544 : add_metric("thread_busy", "role", "scan", -1);
+ - + - -
+ - + - -
- - ]
4282 : 272 : }
4283 : :
4284 : :
4285 : : // Use this function as the thread entry point, so it can catch our
4286 : : // fleet of exceptions (incl. the sqlite_ps ctors) and report.
4287 : : static void*
4288 : 272 : thread_main_scanner (void* arg)
4289 : : {
4290 : 272 : (void) arg;
4291 [ + + ]: 816 : while (! interrupted)
4292 : 272 : try
4293 : : {
4294 [ + - ]: 272 : scan();
4295 : : }
4296 [ - - ]: 0 : catch (const reportable_exception& e)
4297 : : {
4298 [ - - ]: 0 : e.report(cerr);
4299 : 0 : }
4300 : 272 : return 0;
4301 : : }
4302 : :
4303 : :
4304 : :
4305 : : // The thread that traverses all the source_paths and enqueues all the
4306 : : // matching files into the file/archive scan queue.
4307 : : static void
4308 : 122 : scan_source_paths()
4309 : : {
4310 : : // NB: fedora 31 glibc/fts(3) crashes inside fts_read() on empty
4311 : : // path list.
4312 [ + + ]: 122 : if (source_paths.empty())
4313 : 2 : return;
4314 : :
4315 : : // Turn the source_paths into an fts(3)-compatible char**. Since
4316 : : // source_paths[] does not change after argv processing, the
4317 : : // c_str()'s are safe to keep around awile.
4318 : 120 : vector<const char *> sps;
4319 [ + + ]: 322 : for (auto&& sp: source_paths)
4320 [ + - ]: 202 : sps.push_back(sp.c_str());
4321 [ + - - - ]: 120 : sps.push_back(NULL);
4322 : :
4323 [ + + + - ]: 226 : FTS *fts = fts_open ((char * const *)sps.data(),
4324 : : (traverse_logical ? FTS_LOGICAL : FTS_PHYSICAL|FTS_XDEV)
4325 : : | FTS_NOCHDIR /* multithreaded */,
4326 : : NULL);
4327 [ - + ]: 120 : if (fts == NULL)
4328 [ # # # # ]: 0 : throw libc_exception(errno, "cannot fts_open");
4329 : 120 : defer_dtor<FTS*,int> fts_cleanup (fts, fts_close);
4330 : :
4331 : 120 : struct timespec ts_start, ts_end;
4332 : 120 : clock_gettime (CLOCK_MONOTONIC, &ts_start);
4333 : 120 : unsigned fts_scanned = 0, fts_regex = 0;
4334 : :
4335 : 120 : FTSENT *f;
4336 [ + - + + ]: 2148 : while ((f = fts_read (fts)) != NULL)
4337 : : {
4338 [ + - ]: 1908 : if (interrupted) break;
4339 : :
4340 [ - + ]: 1908 : if (sigusr2 != forced_groom_count) // stop early if groom triggered
4341 : : {
4342 [ # # ]: 0 : scanq.clear(); // clear previously issued work for scanner threads
4343 : : break;
4344 : : }
4345 : :
4346 : 1908 : fts_scanned ++;
4347 : :
4348 [ + - ]: 1908 : if (verbose > 2)
4349 [ + - + - : 3816 : obatched(clog) << "fts traversing " << f->fts_path << endl;
+ - ]
4350 : :
4351 [ + + + + : 1908 : switch (f->fts_info)
+ ]
4352 : : {
4353 : 1040 : case FTS_F:
4354 : 1040 : {
4355 : : /* Found a file. Convert it to an absolute path, so
4356 : : the buildid database does not have relative path
4357 : : names that are unresolvable from a subsequent run
4358 : : in a different cwd. */
4359 [ + - ]: 1040 : char *rp = realpath(f->fts_path, NULL);
4360 [ - + ]: 1040 : if (rp == NULL)
4361 : 0 : continue; // ignore dangling symlink or such
4362 [ + - ]: 1040 : string rps = string(rp);
4363 : 1040 : free (rp);
4364 : :
4365 [ + - ]: 1040 : bool ri = !regexec (&file_include_regex, rps.c_str(), 0, 0, 0);
4366 [ + - ]: 1040 : bool rx = !regexec (&file_exclude_regex, rps.c_str(), 0, 0, 0);
4367 [ + + ]: 1040 : if (!ri || rx)
4368 : : {
4369 [ + - ]: 88 : if (verbose > 3)
4370 [ + - ]: 176 : obatched(clog) << "fts skipped by regex "
4371 [ + + + - : 96 : << (!ri ? "I" : "") << (rx ? "X" : "") << endl;
+ + + - +
- ]
4372 : 88 : fts_regex ++;
4373 [ + + ]: 88 : if (!ri)
4374 [ + - + - : 8 : inc_metric("traversed_total","type","file-skipped-I");
+ - + - -
+ - + - -
- - ]
4375 [ + + ]: 88 : if (rx)
4376 [ + - + - : 168 : inc_metric("traversed_total","type","file-skipped-X");
+ - + - -
+ - + - -
- - ]
4377 : : }
4378 : : else
4379 : : {
4380 [ + - + - ]: 952 : scanq.push_back (make_pair(rps, *f->fts_statp));
4381 [ + - + - : 1904 : inc_metric("traversed_total","type","file");
+ - + - -
+ - + - -
- - - - ]
4382 : : }
4383 : 0 : }
4384 : 1040 : break;
4385 : :
4386 : 4 : case FTS_ERR:
4387 : 4 : case FTS_NS:
4388 : : // report on some types of errors because they may reflect fixable misconfiguration
4389 : 4 : {
4390 [ + - + - : 8 : auto x = libc_exception(f->fts_errno, string("fts traversal ") + string(f->fts_path));
+ - + - -
+ - + -
- ]
4391 [ + - ]: 4 : x.report(cerr);
4392 : 0 : }
4393 [ + - + - : 8 : inc_metric("traversed_total","type","error");
+ - + - -
+ - + - -
- - ]
4394 : 4 : break;
4395 : :
4396 : 12 : case FTS_SL: // ignore, but count because debuginfod -L would traverse these
4397 [ + - + - : 24 : inc_metric("traversed_total","type","symlink");
+ - + - -
+ - + - -
- - ]
4398 : 12 : break;
4399 : :
4400 : 426 : case FTS_D: // ignore
4401 [ + - + - : 852 : inc_metric("traversed_total","type","directory");
+ - + - -
+ - + - -
- - ]
4402 : 426 : break;
4403 : :
4404 : 426 : default: // ignore
4405 [ + - + - : 852 : inc_metric("traversed_total","type","other");
+ - + - -
+ - + - -
- - ]
4406 : 426 : break;
4407 : : }
4408 : : }
4409 : 120 : clock_gettime (CLOCK_MONOTONIC, &ts_end);
4410 : 120 : double deltas = (ts_end.tv_sec - ts_start.tv_sec) + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9;
4411 : :
4412 [ + - + - : 360 : obatched(clog) << "fts traversed source paths in " << deltas << "s, scanned=" << fts_scanned
+ - + - ]
4413 [ + - + - : 120 : << ", regex-skipped=" << fts_regex << endl;
+ - ]
4414 [ + - ]: 240 : }
4415 : :
4416 : :
4417 : : static void*
4418 : 68 : thread_main_fts_source_paths (void* arg)
4419 : : {
4420 : 68 : (void) arg; // ignore; we operate on global data
4421 : :
4422 [ + - + - : 136 : set_metric("thread_tid", "role","traverse", tid());
+ - - + -
+ - - -
- ]
4423 [ + - + - : 136 : add_metric("thread_count", "role", "traverse", 1);
+ - - + -
+ - - -
- ]
4424 : :
4425 : 68 : time_t last_rescan = 0;
4426 : :
4427 [ + - ]: 284 : while (! interrupted)
4428 : : {
4429 : 284 : sleep (1);
4430 : 284 : scanq.wait_idle(); // don't start a new traversal while scanners haven't finished the job
4431 : 284 : scanq.done_idle(); // release the hounds
4432 [ + + ]: 284 : if (interrupted) break;
4433 : :
4434 : 216 : time_t now = time(NULL);
4435 : 216 : bool rescan_now = false;
4436 [ + + ]: 216 : if (last_rescan == 0) // at least one initial rescan is documented even for -t0
4437 : 66 : rescan_now = true;
4438 [ + + + + ]: 216 : if (rescan_s > 0 && (long)now > (long)(last_rescan + rescan_s))
4439 : 216 : rescan_now = true;
4440 [ + + ]: 216 : if (sigusr1 != forced_rescan_count)
4441 : : {
4442 : 60 : forced_rescan_count = sigusr1;
4443 : 60 : rescan_now = true;
4444 : : }
4445 [ + + ]: 216 : if (rescan_now)
4446 : : {
4447 [ + - + - : 244 : set_metric("thread_busy", "role","traverse", 1);
+ - - + -
+ - - -
- ]
4448 : 122 : try
4449 : : {
4450 [ + - ]: 122 : scan_source_paths();
4451 : : }
4452 [ - - ]: 0 : catch (const reportable_exception& e)
4453 : : {
4454 [ - - ]: 0 : e.report(cerr);
4455 : 0 : }
4456 : 122 : last_rescan = time(NULL); // NB: now was before scanning
4457 : : // finished a traversal loop
4458 [ + - + - : 244 : inc_metric("thread_work_total", "role","traverse");
+ - - + -
+ - - -
- ]
4459 [ + - + - : 244 : set_metric("thread_busy", "role","traverse", 0);
+ - - + -
+ - - -
- ]
4460 : : }
4461 : : }
4462 : :
4463 : 68 : return 0;
4464 : : }
4465 : :
4466 : :
4467 : :
4468 : : ////////////////////////////////////////////////////////////////////////
4469 : :
4470 : : static void
4471 : 74 : database_stats_report()
4472 : : {
4473 : 74 : sqlite_ps ps_query (db, "database-overview",
4474 [ + - + - : 148 : "select label,quantity from " BUILDIDS "_stats");
+ - - - ]
4475 : :
4476 [ + - + - ]: 148 : obatched(clog) << "database record counts:" << endl;
4477 : 1702 : while (1)
4478 : : {
4479 [ + - ]: 888 : if (interrupted) break;
4480 [ + - ]: 888 : if (sigusr1 != forced_rescan_count) // stop early if scan triggered
4481 : : break;
4482 : :
4483 [ + - ]: 888 : int rc = ps_query.step();
4484 [ + + ]: 888 : if (rc == SQLITE_DONE) break;
4485 [ - + ]: 814 : if (rc != SQLITE_ROW)
4486 [ # # # # ]: 0 : throw sqlite_exception(rc, "step");
4487 : :
4488 [ + - ]: 814 : obatched(clog)
4489 [ + - - + : 814 : << ((const char*) sqlite3_column_text(ps_query, 0) ?: (const char*) "NULL")
+ - ]
4490 : : << " "
4491 [ + - + - : 1628 : << (sqlite3_column_text(ps_query, 1) ?: (const unsigned char*) "NULL")
- + + - ]
4492 : 814 : << endl;
4493 : :
4494 [ + - + - : 1628 : set_metric("groom", "statistic",
- + + - +
- + - + -
- + + + -
- - - ]
4495 [ + - ]: 814 : ((const char*) sqlite3_column_text(ps_query, 0) ?: (const char*) "NULL"),
4496 : : (sqlite3_column_double(ps_query, 1)));
4497 : 814 : }
4498 : 74 : }
4499 : :
4500 : :
4501 : : // Do a round of database grooming that might take many minutes to run.
4502 : 74 : void groom()
4503 : : {
4504 [ + - ]: 148 : obatched(clog) << "grooming database" << endl;
4505 : :
4506 : 74 : struct timespec ts_start, ts_end;
4507 : 74 : clock_gettime (CLOCK_MONOTONIC, &ts_start);
4508 : :
4509 : : // scan for files that have disappeared
4510 : 74 : sqlite_ps files (db, "check old files",
4511 : : "select distinct s.mtime, s.file, f.name from "
4512 : : BUILDIDS "_file_mtime_scanned s, " BUILDIDS "_files_v f "
4513 [ + - + - : 148 : "where f.id = s.file");
+ - - - ]
4514 : : // NB: Because _ftime_mtime_scanned can contain both F and
4515 : : // R records for the same file, this query would return duplicates if the
4516 : : // DISTINCT qualifier were not there.
4517 [ + - ]: 74 : files.reset();
4518 : :
4519 : : // DECISION TIME - we enumerate stale fileids/mtimes
4520 [ + - ]: 74 : deque<pair<int64_t,int64_t> > stale_fileid_mtime;
4521 : :
4522 : 74 : time_t time_start = time(NULL);
4523 : 306 : while(1)
4524 : : {
4525 : : // PR28514: limit grooming iteration to O(rescan time), to avoid
4526 : : // slow filesystem tests over many files locking out rescans for
4527 : : // too long.
4528 [ + + - + ]: 190 : if (rescan_s > 0 && (long)time(NULL) > (long)(time_start + rescan_s))
4529 : : {
4530 [ # # # # : 0 : inc_metric("groomed_total", "decision", "aborted");
# # # # #
# # # # #
# # ]
4531 : 0 : break;
4532 : : }
4533 : :
4534 [ + - ]: 190 : if (interrupted) break;
4535 : :
4536 [ + - ]: 190 : int rc = files.step();
4537 [ + + ]: 190 : if (rc != SQLITE_ROW)
4538 : : break;
4539 : :
4540 [ + - ]: 116 : int64_t mtime = sqlite3_column_int64 (files, 0);
4541 [ + - ]: 116 : int64_t fileid = sqlite3_column_int64 (files, 1);
4542 [ + - - + ]: 116 : const char* filename = ((const char*) sqlite3_column_text (files, 2) ?: "");
4543 : 116 : struct stat s;
4544 : 116 : bool regex_file_drop = 0;
4545 : :
4546 [ + + ]: 116 : if (regex_groom)
4547 : : {
4548 [ + - ]: 16 : bool reg_include = !regexec (&file_include_regex, filename, 0, 0, 0);
4549 [ + - ]: 16 : bool reg_exclude = !regexec (&file_exclude_regex, filename, 0, 0, 0);
4550 : 16 : regex_file_drop = !reg_include || reg_exclude; // match logic of scan_source_paths
4551 : : }
4552 : :
4553 : 116 : rc = stat(filename, &s);
4554 [ + + - + ]: 116 : if ( regex_file_drop || rc < 0 || (mtime != (int64_t) s.st_mtime) )
4555 : : {
4556 [ + - ]: 24 : if (verbose > 2)
4557 [ + - + - : 48 : obatched(clog) << "groom: stale file=" << filename << " mtime=" << mtime << endl;
+ - + - +
- ]
4558 [ + - ]: 24 : stale_fileid_mtime.push_back(make_pair(fileid,mtime));
4559 [ + - + - : 48 : inc_metric("groomed_total", "decision", "stale");
+ - + - -
+ - + - -
- - ]
4560 [ + - + - : 48 : set_metric("thread_work_pending","role","groom", stale_fileid_mtime.size());
+ - + - -
+ - + - -
- - ]
4561 : : }
4562 : : else
4563 [ + - + - : 184 : inc_metric("groomed_total", "decision", "fresh");
+ - + - -
+ - + - -
- - ]
4564 : :
4565 [ + - ]: 116 : if (sigusr1 != forced_rescan_count) // stop early if scan triggered
4566 : : break;
4567 : 116 : }
4568 [ + - ]: 74 : files.reset();
4569 : :
4570 : : // ACTION TIME
4571 : :
4572 : : // Now that we know which file/mtime tuples are stale, actually do
4573 : : // the deletion from the database. Doing this during the SELECT
4574 : : // iteration above results in undefined behaviour in sqlite, as per
4575 : : // https://www.sqlite.org/isolation.html
4576 : :
4577 : : // We could shuffle stale_fileid_mtime[] here. It'd let aborted
4578 : : // sequences of nuke operations resume at random locations, instead
4579 : : // of just starting over. But it doesn't matter much either way,
4580 : : // as long as we make progress.
4581 : :
4582 [ + - + - : 148 : sqlite_ps files_del_f_de (db, "nuke f_de", "delete from " BUILDIDS "_f_de where file = ? and mtime = ?");
+ - + - -
- ]
4583 [ + - + - : 148 : sqlite_ps files_del_r_de (db, "nuke r_de", "delete from " BUILDIDS "_r_de where file = ? and mtime = ?");
+ - + - -
- ]
4584 [ + - ]: 74 : sqlite_ps files_del_scan (db, "nuke f_m_s", "delete from " BUILDIDS "_file_mtime_scanned "
4585 [ + - + - : 148 : "where file = ? and mtime = ?");
+ - + - -
- ]
4586 : :
4587 [ + + ]: 98 : while (! stale_fileid_mtime.empty())
4588 : : {
4589 : 24 : auto stale = stale_fileid_mtime.front();
4590 : 24 : stale_fileid_mtime.pop_front();
4591 [ + - + - : 48 : set_metric("thread_work_pending","role","groom", stale_fileid_mtime.size());
+ - + - -
+ - + - -
- - ]
4592 : :
4593 : : // PR28514: limit grooming iteration to O(rescan time), to avoid
4594 : : // slow nuke_* queries over many files locking out rescans for too
4595 : : // long. We iterate over the files in random() sequence to avoid
4596 : : // partial checks going over the same set.
4597 [ - + - - ]: 24 : if (rescan_s > 0 && (long)time(NULL) > (long)(time_start + rescan_s))
4598 : : {
4599 [ # # # # : 0 : inc_metric("groomed_total", "action", "aborted");
# # # # #
# # # # #
# # ]
4600 : 0 : break;
4601 : : }
4602 : :
4603 [ + - ]: 24 : if (interrupted) break;
4604 : :
4605 : 24 : int64_t fileid = stale.first;
4606 : 24 : int64_t mtime = stale.second;
4607 [ + - + - : 24 : files_del_f_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
+ - + - ]
4608 [ + - + - : 24 : files_del_r_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
+ - + - ]
4609 [ + - + - : 24 : files_del_scan.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
+ - + - ]
4610 [ + - + - : 48 : inc_metric("groomed_total", "action", "cleaned");
+ - + - -
+ - + - -
- - ]
4611 : :
4612 [ + - ]: 24 : if (sigusr1 != forced_rescan_count) // stop early if scan triggered
4613 : : break;
4614 : : }
4615 : 74 : stale_fileid_mtime.clear(); // no need for this any longer
4616 [ + - + - : 148 : set_metric("thread_work_pending","role","groom", stale_fileid_mtime.size());
+ - + - -
+ - + - -
- - ]
4617 : :
4618 : : // delete buildids with no references in _r_de or _f_de tables;
4619 : : // cascades to _r_sref & _f_s records
4620 [ + - ]: 74 : sqlite_ps buildids_del (db, "nuke orphan buildids",
4621 : : "delete from " BUILDIDS "_buildids "
4622 : : "where not exists (select 1 from " BUILDIDS "_f_de d where " BUILDIDS "_buildids.id = d.buildid) "
4623 [ + - + - : 148 : "and not exists (select 1 from " BUILDIDS "_r_de d where " BUILDIDS "_buildids.id = d.buildid)");
+ - + - -
- ]
4624 [ + - + - ]: 74 : buildids_del.reset().step_ok_done();
4625 : :
4626 [ - + ]: 74 : if (interrupted) return;
4627 : :
4628 : : // NB: "vacuum" is too heavy for even daily runs: it rewrites the entire db, so is done as maxigroom -G
4629 [ + - + - : 222 : { sqlite_ps g (db, "incremental vacuum", "pragma incremental_vacuum"); g.reset().step_ok_done(); }
+ - + - +
- + - -
- ]
4630 : : // https://www.sqlite.org/lang_analyze.html#approx
4631 [ + - + - : 222 : { sqlite_ps g (db, "analyze setup", "pragma analysis_limit = 1000;\n"); g.reset().step_ok_done(); }
+ - + - +
- + - -
- ]
4632 [ + - + - : 148 : { sqlite_ps g (db, "analyze", "analyze"); g.reset().step_ok_done(); }
+ - - + +
- + - -
- ]
4633 [ + - + - : 222 : { sqlite_ps g (db, "analyze reload", "analyze sqlite_schema"); g.reset().step_ok_done(); }
+ - + - +
- + - -
- ]
4634 [ + - + - : 148 : { sqlite_ps g (db, "optimize", "pragma optimize"); g.reset().step_ok_done(); }
+ - - + +
- + - -
- ]
4635 [ + - + - : 222 : { sqlite_ps g (db, "wal checkpoint", "pragma wal_checkpoint=truncate"); g.reset().step_ok_done(); }
+ - + - +
- + - -
- ]
4636 : :
4637 [ + - ]: 74 : database_stats_report();
4638 : :
4639 [ + - + - ]: 74 : (void) statfs_free_enough_p(db_path, "database"); // report sqlite filesystem size
4640 : :
4641 [ + - ]: 74 : sqlite3_db_release_memory(db); // shrink the process if possible
4642 [ + - ]: 74 : sqlite3_db_release_memory(dbq); // ... for both connections
4643 [ + - ]: 74 : debuginfod_pool_groom(); // and release any debuginfod_client objects we've been holding onto
4644 : : #if HAVE_MALLOC_TRIM
4645 : 74 : malloc_trim(0); // PR31103: release memory allocated for temporary purposes
4646 : : #endif
4647 : :
4648 : : #if 0 /* PR31265: don't jettison cache unnecessarily */
4649 : : fdcache.limit(0); // release the fdcache contents
4650 : : fdcache.limit(fdcache_mbs); // restore status quo parameters
4651 : : #endif
4652 : :
4653 : 74 : clock_gettime (CLOCK_MONOTONIC, &ts_end);
4654 : 74 : double deltas = (ts_end.tv_sec - ts_start.tv_sec) + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9;
4655 : :
4656 [ + - + - : 148 : obatched(clog) << "groomed database in " << deltas << "s" << endl;
+ - + - ]
4657 : 74 : }
4658 : :
4659 : :
4660 : : static void*
4661 : 74 : thread_main_groom (void* /*arg*/)
4662 : : {
4663 [ + - + - : 148 : set_metric("thread_tid", "role", "groom", tid());
+ - - + -
+ - - -
- ]
4664 [ + - + - : 148 : add_metric("thread_count", "role", "groom", 1);
+ - - + -
+ - - -
- ]
4665 : :
4666 : 74 : time_t last_groom = 0;
4667 : :
4668 : 510 : while (1)
4669 : : {
4670 : 292 : sleep (1);
4671 : 292 : scanq.wait_idle(); // PR25394: block scanners during grooming!
4672 [ + + ]: 292 : if (interrupted) break;
4673 : :
4674 : 218 : time_t now = time(NULL);
4675 : 218 : bool groom_now = false;
4676 [ + + ]: 218 : if (last_groom == 0) // at least one initial groom is documented even for -g0
4677 : 68 : groom_now = true;
4678 [ + + + + ]: 218 : if (groom_s > 0 && (long)now > (long)(last_groom + groom_s))
4679 : 218 : groom_now = true;
4680 [ + + ]: 218 : if (sigusr2 != forced_groom_count)
4681 : : {
4682 : 6 : forced_groom_count = sigusr2;
4683 : 6 : groom_now = true;
4684 : : }
4685 [ + + ]: 218 : if (groom_now)
4686 : : {
4687 [ + - + - : 148 : set_metric("thread_busy", "role", "groom", 1);
+ - - + -
+ - - -
- ]
4688 : 74 : try
4689 : : {
4690 [ + - ]: 74 : groom ();
4691 : : }
4692 [ - - ]: 0 : catch (const sqlite_exception& e)
4693 : : {
4694 [ - - - - : 0 : obatched(cerr) << e.message << endl;
- - ]
4695 : 0 : }
4696 : 74 : last_groom = time(NULL); // NB: now was before grooming
4697 : : // finished a grooming loop
4698 [ + - + - : 148 : inc_metric("thread_work_total", "role", "groom");
+ - - + -
+ - - -
- ]
4699 [ + - + - : 148 : set_metric("thread_busy", "role", "groom", 0);
+ - - + -
+ - - -
- ]
4700 : : }
4701 : :
4702 : 218 : scanq.done_idle();
4703 : 218 : }
4704 : :
4705 : 74 : return 0;
4706 : : }
4707 : :
4708 : :
4709 : : ////////////////////////////////////////////////////////////////////////
4710 : :
4711 : :
4712 : : static void
4713 : 76 : signal_handler (int /* sig */)
4714 : : {
4715 : 76 : interrupted ++;
4716 : :
4717 [ + + ]: 76 : if (db)
4718 : 74 : sqlite3_interrupt (db);
4719 [ + - ]: 76 : if (dbq)
4720 : 76 : sqlite3_interrupt (dbq);
4721 : :
4722 : : // NB: don't do anything else in here
4723 : 76 : }
4724 : :
4725 : : static void
4726 : 60 : sigusr1_handler (int /* sig */)
4727 : : {
4728 : 60 : sigusr1 ++;
4729 : : // NB: don't do anything else in here
4730 : 60 : }
4731 : :
4732 : : static void
4733 : 6 : sigusr2_handler (int /* sig */)
4734 : : {
4735 : 6 : sigusr2 ++;
4736 : : // NB: don't do anything else in here
4737 : 6 : }
4738 : :
4739 : :
4740 : : static void // error logging callback from libmicrohttpd internals
4741 : 0 : error_cb (void *arg, const char *fmt, va_list ap)
4742 : : {
4743 : 0 : (void) arg;
4744 [ # # # # : 0 : inc_metric("error_count","libmicrohttpd",fmt);
# # # # #
# # # #
# ]
4745 : 0 : char errmsg[512];
4746 : 0 : (void) vsnprintf (errmsg, sizeof(errmsg), fmt, ap); // ok if slightly truncated
4747 [ # # ]: 0 : obatched(cerr) << "libmicrohttpd error: " << errmsg; // MHD_DLOG calls already include \n
4748 : 0 : }
4749 : :
4750 : :
4751 : : // A user-defined sqlite function, to score the sharedness of the
4752 : : // prefix of two strings. This is used to compare candidate debuginfo
4753 : : // / source-rpm names, so that the closest match
4754 : : // (directory-topology-wise closest) is found. This is important in
4755 : : // case the same sref (source file name) is in many -debuginfo or
4756 : : // -debugsource RPMs, such as when multiple versions/releases of the
4757 : : // same package are in the database.
4758 : :
4759 : 296 : static void sqlite3_sharedprefix_fn (sqlite3_context* c, int argc, sqlite3_value** argv)
4760 : : {
4761 [ - + ]: 296 : if (argc != 2)
4762 : 0 : sqlite3_result_error(c, "expect 2 string arguments", -1);
4763 [ + - + + ]: 592 : else if ((sqlite3_value_type(argv[0]) != SQLITE_TEXT) ||
4764 : 296 : (sqlite3_value_type(argv[1]) != SQLITE_TEXT))
4765 : 6 : sqlite3_result_null(c);
4766 : : else
4767 : : {
4768 : 290 : const unsigned char* a = sqlite3_value_text (argv[0]);
4769 : 290 : const unsigned char* b = sqlite3_value_text (argv[1]);
4770 : 290 : int i = 0;
4771 [ + + + - : 35878 : while (*a != '\0' && *b != '\0' && *a++ == *b++)
+ + + + ]
4772 : 35048 : i++;
4773 : 290 : sqlite3_result_int (c, i);
4774 : : }
4775 : 296 : }
4776 : :
4777 : :
4778 : : static unsigned
4779 : 148 : default_concurrency() // guaranteed >= 1
4780 : : {
4781 : : // Prior to PR29975 & PR29976, we'd just use this:
4782 : 148 : unsigned sth = std::thread::hardware_concurrency();
4783 : : // ... but on many-CPU boxes, admins or distros may throttle
4784 : : // resources in such a way that debuginfod would mysteriously fail.
4785 : : // So we reduce the defaults:
4786 : :
4787 : 148 : unsigned aff = 0;
4788 : : #ifdef HAVE_SCHED_GETAFFINITY
4789 : 148 : {
4790 : 148 : int ret;
4791 : 148 : cpu_set_t mask;
4792 : 148 : CPU_ZERO(&mask);
4793 : 148 : ret = sched_getaffinity(0, sizeof(mask), &mask);
4794 [ + - ]: 148 : if (ret == 0)
4795 : 148 : aff = CPU_COUNT(&mask);
4796 : : }
4797 : : #endif
4798 : :
4799 : 148 : unsigned fn = 0;
4800 : : #ifdef HAVE_GETRLIMIT
4801 : 148 : {
4802 : 148 : struct rlimit rlim;
4803 : 148 : int rc = getrlimit(RLIMIT_NOFILE, &rlim);
4804 [ + - ]: 148 : if (rc == 0)
4805 [ + - ]: 296 : fn = max((rlim_t)1, (rlim.rlim_cur - 100) / 4);
4806 : : // at least 2 fds are used by each listener thread etc.
4807 : : // plus a bunch to account for shared libraries and such
4808 : : }
4809 : : #endif
4810 : :
4811 [ - + - + : 148 : unsigned d = min(max(sth, 1U),
- + ]
4812 [ - + ]: 148 : min(max(aff, 1U),
4813 [ - + ]: 148 : max(fn, 1U)));
4814 : 148 : return d;
4815 : : }
4816 : :
4817 : :
4818 : : // 30879: Something to help out in case of an uncaught exception.
4819 : 0 : void my_terminate_handler()
4820 : : {
4821 : : #if defined(__GLIBC__)
4822 : 0 : void *array[40];
4823 : 0 : int size = backtrace (array, 40);
4824 : 0 : backtrace_symbols_fd (array, size, STDERR_FILENO);
4825 : : #endif
4826 : : #if defined(__GLIBCXX__) || defined(__GLIBCPP__)
4827 : 0 : __gnu_cxx::__verbose_terminate_handler();
4828 : : #endif
4829 : 0 : abort();
4830 : : }
4831 : :
4832 : :
4833 : : int
4834 : 76 : main (int argc, char *argv[])
4835 : : {
4836 : 76 : (void) setlocale (LC_ALL, "");
4837 : 76 : (void) bindtextdomain (PACKAGE_TARNAME, LOCALEDIR);
4838 : 76 : (void) textdomain (PACKAGE_TARNAME);
4839 : :
4840 : 76 : std::set_terminate(& my_terminate_handler);
4841 : :
4842 : : /* Tell the library which version we are expecting. */
4843 : 76 : elf_version (EV_CURRENT);
4844 : :
4845 [ + - - + ]: 152 : tmpdir = string(getenv("TMPDIR") ?: "/tmp");
4846 : :
4847 : : /* Set computed default values. */
4848 [ - + + - : 76 : db_path = string(getenv("HOME") ?: "/") + string("/.debuginfod.sqlite"); /* XDG? */
+ - - + -
+ + - -
- ]
4849 : 76 : int rc = regcomp (& file_include_regex, ".*", REG_EXTENDED|REG_NOSUB); // match everything
4850 [ - + ]: 76 : if (rc != 0)
4851 : 0 : error (EXIT_FAILURE, 0, "regcomp failure: %d", rc);
4852 : 76 : rc = regcomp (& file_exclude_regex, "^$", REG_EXTENDED|REG_NOSUB); // match nothing
4853 [ - + ]: 76 : if (rc != 0)
4854 : 0 : error (EXIT_FAILURE, 0, "regcomp failure: %d", rc);
4855 : :
4856 : : // default parameters for fdcache are computed from system stats
4857 : 76 : struct statfs sfs;
4858 : 76 : rc = statfs(tmpdir.c_str(), &sfs);
4859 [ - + ]: 76 : if (rc < 0)
4860 : 0 : fdcache_mbs = 1024; // 1 gigabyte
4861 : : else
4862 : 76 : fdcache_mbs = sfs.f_bavail * sfs.f_bsize / 1024 / 1024 / 4; // 25% of free space
4863 : 76 : fdcache_mintmp = 25; // emergency flush at 25% remaining (75% full)
4864 : 76 : fdcache_prefetch = 64; // guesstimate storage is this much less costly than re-decompression
4865 : :
4866 : : /* Parse and process arguments. */
4867 : 76 : int remaining;
4868 : 76 : (void) argp_parse (&argp, argc, argv, ARGP_IN_ORDER, &remaining, NULL);
4869 [ - + ]: 76 : if (remaining != argc)
4870 : 0 : error (EXIT_FAILURE, 0,
4871 : 0 : "unexpected argument: %s", argv[remaining]);
4872 : :
4873 [ + + + + : 76 : if (scan_archives.size()==0 && !scan_files && source_paths.size()>0)
- + ]
4874 [ # # ]: 0 : obatched(clog) << "warning: without -F -R -U -Z, ignoring PATHs" << endl;
4875 : :
4876 : 76 : fdcache.limit(fdcache_mbs);
4877 : :
4878 : 76 : (void) signal (SIGPIPE, SIG_IGN); // microhttpd can generate it incidentally, ignore
4879 : 76 : (void) signal (SIGINT, signal_handler); // ^C
4880 : 76 : (void) signal (SIGHUP, signal_handler); // EOF
4881 : 76 : (void) signal (SIGTERM, signal_handler); // systemd
4882 : 76 : (void) signal (SIGUSR1, sigusr1_handler); // end-user
4883 : 76 : (void) signal (SIGUSR2, sigusr2_handler); // end-user
4884 : :
4885 : : /* Get database ready. */
4886 [ + + ]: 76 : if (! passive_p)
4887 : : {
4888 : 74 : rc = sqlite3_open_v2 (db_path.c_str(), &db, (SQLITE_OPEN_READWRITE
4889 : : |SQLITE_OPEN_URI
4890 : : |SQLITE_OPEN_PRIVATECACHE
4891 : : |SQLITE_OPEN_CREATE
4892 : : |SQLITE_OPEN_FULLMUTEX), /* thread-safe */
4893 : : NULL);
4894 [ - + ]: 74 : if (rc == SQLITE_CORRUPT)
4895 : : {
4896 : 0 : (void) unlink (db_path.c_str());
4897 : 0 : error (EXIT_FAILURE, 0,
4898 : : "cannot open %s, deleted database: %s", db_path.c_str(), sqlite3_errmsg(db));
4899 : : }
4900 [ - + ]: 74 : else if (rc)
4901 : : {
4902 : 0 : error (EXIT_FAILURE, 0,
4903 : : "cannot open %s, consider deleting database: %s", db_path.c_str(), sqlite3_errmsg(db));
4904 : : }
4905 : : }
4906 : :
4907 : : // open the readonly query variant
4908 : : // NB: PRIVATECACHE allows web queries to operate in parallel with
4909 : : // much other grooming/scanning operation.
4910 : 76 : rc = sqlite3_open_v2 (db_path.c_str(), &dbq, (SQLITE_OPEN_READONLY
4911 : : |SQLITE_OPEN_URI
4912 : : |SQLITE_OPEN_PRIVATECACHE
4913 : : |SQLITE_OPEN_FULLMUTEX), /* thread-safe */
4914 : : NULL);
4915 [ - + ]: 76 : if (rc)
4916 : : {
4917 : 0 : error (EXIT_FAILURE, 0,
4918 : : "cannot open %s, consider deleting database: %s", db_path.c_str(), sqlite3_errmsg(dbq));
4919 : : }
4920 : :
4921 : :
4922 [ + - ]: 152 : obatched(clog) << "opened database " << db_path
4923 [ + + + - : 78 : << (db?" rw":"") << (dbq?" ro":"") << endl;
- + + - +
- ]
4924 [ + - + - ]: 152 : obatched(clog) << "sqlite version " << sqlite3_version << endl;
4925 [ + + + - : 226 : obatched(clog) << "service mode " << (passive_p ? "passive":"active") << endl;
+ - ]
4926 : :
4927 : : // add special string-prefix-similarity function used in rpm sref/sdef resolution
4928 : 76 : rc = sqlite3_create_function(dbq, "sharedprefix", 2, SQLITE_UTF8, NULL,
4929 : : & sqlite3_sharedprefix_fn, NULL, NULL);
4930 [ - + ]: 76 : if (rc != SQLITE_OK)
4931 : 0 : error (EXIT_FAILURE, 0,
4932 : : "cannot create sharedprefix function: %s", sqlite3_errmsg(dbq));
4933 : :
4934 [ + + ]: 76 : if (! passive_p)
4935 : : {
4936 [ + + ]: 74 : if (verbose > 3)
4937 [ + - + - ]: 88 : obatched(clog) << "ddl: " << DEBUGINFOD_SQLITE_DDL << endl;
4938 : 74 : rc = sqlite3_exec (db, DEBUGINFOD_SQLITE_DDL, NULL, NULL, NULL);
4939 [ - + ]: 74 : if (rc != SQLITE_OK)
4940 : : {
4941 : 0 : error (EXIT_FAILURE, 0,
4942 : : "cannot run database schema ddl: %s", sqlite3_errmsg(db));
4943 : : }
4944 : : }
4945 : :
4946 [ + - + - : 152 : obatched(clog) << "libmicrohttpd version " << MHD_get_version() << endl;
+ - ]
4947 : :
4948 : : /* If '-C' wasn't given or was given with no arg, pick a reasonable default
4949 : : for the number of worker threads. */
4950 [ + + ]: 76 : if (connection_pool == 0)
4951 : 72 : connection_pool = default_concurrency();
4952 : :
4953 : : /* Note that MHD_USE_EPOLL and MHD_USE_THREAD_PER_CONNECTION don't
4954 : : work together. */
4955 : 76 : unsigned int use_epoll = 0;
4956 : : #if MHD_VERSION >= 0x00095100
4957 : 76 : use_epoll = MHD_USE_EPOLL;
4958 : : #endif
4959 : :
4960 : 76 : unsigned int mhd_flags = (
4961 : : #if MHD_VERSION >= 0x00095300
4962 : : MHD_USE_INTERNAL_POLLING_THREAD
4963 : : #else
4964 : : MHD_USE_SELECT_INTERNALLY
4965 : : #endif
4966 : : | MHD_USE_DUAL_STACK
4967 : : | use_epoll
4968 : : #if MHD_VERSION >= 0x00095200
4969 : : | MHD_USE_ITC
4970 : : #endif
4971 : : | MHD_USE_DEBUG); /* report errors to stderr */
4972 : :
4973 : : // Start httpd server threads. Use a single dual-homed pool.
4974 : 76 : MHD_Daemon *d46 = MHD_start_daemon (mhd_flags, http_port,
4975 : : NULL, NULL, /* default accept policy */
4976 : : handler_cb, NULL, /* handler callback */
4977 : : MHD_OPTION_EXTERNAL_LOGGER,
4978 : : error_cb, NULL,
4979 : : MHD_OPTION_THREAD_POOL_SIZE,
4980 : : (int)connection_pool,
4981 : : MHD_OPTION_END);
4982 : :
4983 : 76 : MHD_Daemon *d4 = NULL;
4984 [ - + ]: 76 : if (d46 == NULL)
4985 : : {
4986 : : // Cannot use dual_stack, use ipv4 only
4987 : 0 : mhd_flags &= ~(MHD_USE_DUAL_STACK);
4988 [ # # ]: 0 : d4 = MHD_start_daemon (mhd_flags, http_port,
4989 : : NULL, NULL, /* default accept policy */
4990 : : handler_cb, NULL, /* handler callback */
4991 : : MHD_OPTION_EXTERNAL_LOGGER,
4992 : : error_cb, NULL,
4993 : : (connection_pool
4994 : : ? MHD_OPTION_THREAD_POOL_SIZE
4995 : : : MHD_OPTION_END),
4996 : : (connection_pool
4997 : : ? (int)connection_pool
4998 : : : MHD_OPTION_END),
4999 : : MHD_OPTION_END);
5000 [ # # ]: 0 : if (d4 == NULL)
5001 : : {
5002 : 0 : sqlite3 *database = db;
5003 : 0 : sqlite3 *databaseq = dbq;
5004 : 0 : db = dbq = 0; // for signal_handler not to freak
5005 : 0 : sqlite3_close (databaseq);
5006 : 0 : sqlite3_close (database);
5007 : 0 : error (EXIT_FAILURE, 0, "cannot start http server at port %d",
5008 : : http_port);
5009 : : }
5010 : :
5011 : : }
5012 : 76 : obatched(clog) << "started http server on"
5013 : : << (d4 != NULL ? " IPv4 " : " IPv4 IPv6 ")
5014 [ + - + - : 152 : << "port=" << http_port << endl;
+ - + - +
- ]
5015 : :
5016 : : // add maxigroom sql if -G given
5017 [ - + ]: 76 : if (maxigroom)
5018 : : {
5019 [ # # ]: 0 : obatched(clog) << "maxigrooming database, please wait." << endl;
5020 : : // NB: this index alone can nearly double the database size!
5021 : : // NB: this index would be necessary to run source-file metadata searches fast
5022 [ # # ]: 0 : extra_ddl.push_back("create index if not exists " BUILDIDS "_r_sref_arc on " BUILDIDS "_r_sref(artifactsrc);");
5023 [ # # ]: 0 : extra_ddl.push_back("delete from " BUILDIDS "_r_sdef where not exists (select 1 from " BUILDIDS "_r_sref b where " BUILDIDS "_r_sdef.content = b.artifactsrc);");
5024 [ # # ]: 0 : extra_ddl.push_back("drop index if exists " BUILDIDS "_r_sref_arc;");
5025 : :
5026 : : // NB: we don't maxigroom the _files interning table. It'd require a temp index on all the
5027 : : // tables that have file foreign-keys, which is a lot.
5028 : :
5029 : : // NB: with =delete, may take up 3x disk space total during vacuum process
5030 : : // vs. =off (only 2x but may corrupt database if program dies mid-vacuum)
5031 : : // vs. =wal (>3x observed, but safe)
5032 [ # # ]: 0 : extra_ddl.push_back("pragma journal_mode=delete;");
5033 [ # # ]: 0 : extra_ddl.push_back("vacuum;");
5034 [ # # ]: 0 : extra_ddl.push_back("pragma journal_mode=wal;");
5035 : : }
5036 : :
5037 : : // run extra -D sql if given
5038 [ + + ]: 76 : if (! passive_p)
5039 [ - + ]: 74 : for (auto&& i: extra_ddl)
5040 : : {
5041 [ # # ]: 0 : if (verbose > 1)
5042 [ # # # # ]: 0 : obatched(clog) << "extra ddl:\n" << i << endl;
5043 : 0 : rc = sqlite3_exec (db, i.c_str(), NULL, NULL, NULL);
5044 [ # # # # ]: 0 : if (rc != SQLITE_OK && rc != SQLITE_DONE && rc != SQLITE_ROW)
5045 : 0 : error (0, 0,
5046 : : "warning: cannot run database extra ddl %s: %s", i.c_str(), sqlite3_errmsg(db));
5047 : :
5048 [ # # ]: 0 : if (maxigroom)
5049 [ # # ]: 0 : obatched(clog) << "maxigroomed database" << endl;
5050 : : }
5051 : :
5052 [ + + ]: 76 : if (! passive_p)
5053 [ + - + - ]: 148 : obatched(clog) << "search concurrency " << concurrency << endl;
5054 : 76 : obatched(clog) << "webapi connection pool " << connection_pool
5055 [ + - - + : 76 : << (connection_pool ? "" : " (unlimited)") << endl;
+ - + - ]
5056 [ + + ]: 76 : if (! passive_p) {
5057 [ + - + - ]: 148 : obatched(clog) << "rescan time " << rescan_s << endl;
5058 [ + - + - ]: 148 : obatched(clog) << "scan checkpoint " << scan_checkpoint << endl;
5059 : : }
5060 [ + - + - ]: 152 : obatched(clog) << "fdcache mbs " << fdcache_mbs << endl;
5061 [ + - + - ]: 152 : obatched(clog) << "fdcache prefetch " << fdcache_prefetch << endl;
5062 [ + - + - ]: 152 : obatched(clog) << "fdcache tmpdir " << tmpdir << endl;
5063 [ + - + - ]: 152 : obatched(clog) << "fdcache tmpdir min% " << fdcache_mintmp << endl;
5064 [ + + ]: 76 : if (! passive_p)
5065 [ + - + - ]: 148 : obatched(clog) << "groom time " << groom_s << endl;
5066 [ + - + - ]: 152 : obatched(clog) << "forwarded ttl limit " << forwarded_ttl_limit << endl;
5067 : :
5068 [ + + ]: 76 : if (scan_archives.size()>0)
5069 : : {
5070 : 54 : obatched ob(clog);
5071 [ + - ]: 54 : auto& o = ob << "accepting archive types ";
5072 [ + + ]: 164 : for (auto&& arch : scan_archives)
5073 [ + - + - : 110 : o << arch.first << "(" << arch.second << ") ";
+ - + - ]
5074 [ + - ]: 54 : o << endl;
5075 : 54 : }
5076 : 76 : const char* du = getenv(DEBUGINFOD_URLS_ENV_VAR);
5077 [ + + + + ]: 76 : if (du && du[0] != '\0') // set to non-empty string?
5078 [ + - + - ]: 32 : obatched(clog) << "upstream debuginfod servers: " << du << endl;
5079 : :
5080 [ + + ]: 76 : vector<pthread_t> all_threads;
5081 : :
5082 [ + + ]: 76 : if (! passive_p)
5083 : : {
5084 : 74 : pthread_t pt;
5085 : 74 : rc = pthread_create (& pt, NULL, thread_main_groom, NULL);
5086 [ - + ]: 74 : if (rc)
5087 : 0 : error (EXIT_FAILURE, rc, "cannot spawn thread to groom database\n");
5088 : : else
5089 : : {
5090 : : #ifdef HAVE_PTHREAD_SETNAME_NP
5091 : 74 : (void) pthread_setname_np (pt, "groom");
5092 : : #endif
5093 [ + - ]: 74 : all_threads.push_back(pt);
5094 : : }
5095 : :
5096 [ + + + + ]: 74 : if (scan_files || scan_archives.size() > 0)
5097 : : {
5098 [ + - ]: 68 : if (scan_checkpoint > 0)
5099 [ + - ]: 68 : scan_barrier = new sqlite_checkpoint_pb(concurrency, (unsigned) scan_checkpoint);
5100 : :
5101 : 68 : rc = pthread_create (& pt, NULL, thread_main_fts_source_paths, NULL);
5102 [ - + ]: 68 : if (rc)
5103 : 0 : error (EXIT_FAILURE, rc, "cannot spawn thread to traverse source paths\n");
5104 : : #ifdef HAVE_PTHREAD_SETNAME_NP
5105 : 68 : (void) pthread_setname_np (pt, "traverse");
5106 : : #endif
5107 [ + - ]: 68 : all_threads.push_back(pt);
5108 : :
5109 [ + + ]: 340 : for (unsigned i=0; i<concurrency; i++)
5110 : : {
5111 : 272 : rc = pthread_create (& pt, NULL, thread_main_scanner, NULL);
5112 [ - + ]: 272 : if (rc)
5113 : 0 : error (EXIT_FAILURE, rc, "cannot spawn thread to scan source files / archives\n");
5114 : : #ifdef HAVE_PTHREAD_SETNAME_NP
5115 : 272 : (void) pthread_setname_np (pt, "scan");
5116 : : #endif
5117 [ + - ]: 272 : all_threads.push_back(pt);
5118 : : }
5119 : : }
5120 : : }
5121 : :
5122 : : /* Trivial main loop! */
5123 [ + - + - ]: 76 : set_metric("ready", 1);
5124 [ + + ]: 218 : while (! interrupted)
5125 [ + - ]: 142 : pause ();
5126 [ + - ]: 76 : scanq.nuke(); // wake up any remaining scanq-related threads, let them die
5127 [ + + + - ]: 76 : if (scan_barrier) scan_barrier->nuke(); // ... in case they're stuck in a barrier
5128 [ + - + - ]: 76 : set_metric("ready", 0);
5129 : :
5130 [ + - ]: 76 : if (verbose)
5131 [ + - + - : 152 : obatched(clog) << "stopping" << endl;
- - ]
5132 : :
5133 : : /* Join all our threads. */
5134 [ + + ]: 490 : for (auto&& it : all_threads)
5135 [ + - ]: 414 : pthread_join (it, NULL);
5136 : :
5137 : : /* Stop all the web service threads. */
5138 [ + - + - ]: 76 : if (d46) MHD_stop_daemon (d46);
5139 [ - + - - ]: 76 : if (d4) MHD_stop_daemon (d4);
5140 : :
5141 [ + + ]: 76 : if (! passive_p)
5142 : : {
5143 : : /* With all threads known dead, we can clean up the global resources. */
5144 [ + - ]: 74 : rc = sqlite3_exec (db, DEBUGINFOD_SQLITE_CLEANUP_DDL, NULL, NULL, NULL);
5145 [ - + ]: 74 : if (rc != SQLITE_OK)
5146 : : {
5147 [ # # # # ]: 0 : error (0, 0,
5148 : : "warning: cannot run database cleanup ddl: %s", sqlite3_errmsg(db));
5149 : : }
5150 : : }
5151 : :
5152 [ + - ]: 76 : debuginfod_pool_groom ();
5153 [ + + ]: 76 : delete scan_barrier;
5154 : :
5155 : : // NB: no problem with unconditional free here - an earlier failed regcomp would exit program
5156 [ + - ]: 76 : (void) regfree (& file_include_regex);
5157 [ + - ]: 76 : (void) regfree (& file_exclude_regex);
5158 : :
5159 : 76 : sqlite3 *database = db;
5160 : 76 : sqlite3 *databaseq = dbq;
5161 : 76 : db = dbq = 0; // for signal_handler not to freak
5162 [ + - ]: 76 : (void) sqlite3_close (databaseq);
5163 [ + + ]: 76 : if (! passive_p)
5164 [ + - ]: 74 : (void) sqlite3_close (database);
5165 : :
5166 [ + + ]: 76 : return 0;
5167 : 76 : }
|