Branch data Line data Source code
1 : : /* Debuginfo-over-http server.
2 : : Copyright (C) 2019-2021 Red Hat, Inc.
3 : : Copyright (C) 2021, 2022 Mark J. Wielaard <mark@klomp.org>
4 : : This file is part of elfutils.
5 : :
6 : : This file is free software; you can redistribute it and/or modify
7 : : it under the terms of the GNU General Public License as published by
8 : : the Free Software Foundation; either version 3 of the License, or
9 : : (at your option) any later version.
10 : :
11 : : elfutils is distributed in the hope that it will be useful, but
12 : : WITHOUT ANY WARRANTY; without even the implied warranty of
13 : : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : : GNU General Public License for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 : :
19 : :
20 : : /* cargo-cult from libdwfl linux-kernel-modules.c */
21 : : /* In case we have a bad fts we include this before config.h because it
22 : : can't handle _FILE_OFFSET_BITS.
23 : : Everything we need here is fine if its declarations just come first.
24 : : Also, include sys/types.h before fts. On some systems fts.h is not self
25 : : contained. */
26 : : #ifdef BAD_FTS
27 : : #include <sys/types.h>
28 : : #include <fts.h>
29 : : #endif
30 : :
31 : : #ifdef HAVE_CONFIG_H
32 : : #include "config.h"
33 : : #endif
34 : :
35 : : // #define _GNU_SOURCE
36 : : #ifdef HAVE_SCHED_H
37 : : extern "C" {
38 : : #include <sched.h>
39 : : }
40 : : #endif
41 : : #ifdef HAVE_SYS_RESOURCE_H
42 : : extern "C" {
43 : : #include <sys/resource.h>
44 : : }
45 : : #endif
46 : :
47 : : extern "C" {
48 : : #include "printversion.h"
49 : : #include "system.h"
50 : : }
51 : :
52 : : #include "debuginfod.h"
53 : : #include <dwarf.h>
54 : :
55 : : #include <argp.h>
56 : : #ifdef __GNUC__
57 : : #undef __attribute__ /* glibc bug - rhbz 1763325 */
58 : : #endif
59 : :
60 : : #include <unistd.h>
61 : : #include <stdlib.h>
62 : : #include <locale.h>
63 : : #include <pthread.h>
64 : : #include <signal.h>
65 : : #include <sys/stat.h>
66 : : #include <sys/time.h>
67 : : #include <sys/vfs.h>
68 : : #include <unistd.h>
69 : : #include <fcntl.h>
70 : : #include <netdb.h>
71 : :
72 : :
73 : : /* If fts.h is included before config.h, its indirect inclusions may not
74 : : give us the right LFS aliases of these functions, so map them manually. */
75 : : #ifdef BAD_FTS
76 : : #ifdef _FILE_OFFSET_BITS
77 : : #define open open64
78 : : #define fopen fopen64
79 : : #endif
80 : : #else
81 : : #include <sys/types.h>
82 : : #include <fts.h>
83 : : #endif
84 : :
85 : : #include <cstring>
86 : : #include <vector>
87 : : #include <set>
88 : : #include <map>
89 : : #include <string>
90 : : #include <iostream>
91 : : #include <iomanip>
92 : : #include <ostream>
93 : : #include <sstream>
94 : : #include <mutex>
95 : : #include <deque>
96 : : #include <condition_variable>
97 : : #include <thread>
98 : : // #include <regex> // on rhel7 gcc 4.8, not competent
99 : : #include <regex.h>
100 : : // #include <algorithm>
101 : : using namespace std;
102 : :
103 : : #include <gelf.h>
104 : : #include <libdwelf.h>
105 : :
106 : : #include <microhttpd.h>
107 : :
108 : : #if MHD_VERSION >= 0x00097002
109 : : // libmicrohttpd 0.9.71 broke API
110 : : #define MHD_RESULT enum MHD_Result
111 : : #else
112 : : #define MHD_RESULT int
113 : : #endif
114 : :
115 : : #include <curl/curl.h>
116 : : #include <archive.h>
117 : : #include <archive_entry.h>
118 : : #include <sqlite3.h>
119 : :
120 : : #ifdef __linux__
121 : : #include <sys/syscall.h>
122 : : #endif
123 : :
124 : : #ifdef __linux__
125 : : #define tid() syscall(SYS_gettid)
126 : : #else
127 : : #define tid() pthread_self()
128 : : #endif
129 : :
130 : :
131 : : inline bool
132 : 1517 : string_endswith(const string& haystack, const string& needle)
133 : : {
134 [ + - + + ]: 1517 : return (haystack.size() >= needle.size() &&
135 [ + + ]: 1517 : equal(haystack.end()-needle.size(), haystack.end(),
136 : 1517 : needle.begin()));
137 : : }
138 : :
139 : :
140 : : // Roll this identifier for every sqlite schema incompatibility.
141 : : #define BUILDIDS "buildids9"
142 : :
143 : : #if SQLITE_VERSION_NUMBER >= 3008000
144 : : #define WITHOUT_ROWID "without rowid"
145 : : #else
146 : : #define WITHOUT_ROWID ""
147 : : #endif
148 : :
149 : : static const char DEBUGINFOD_SQLITE_DDL[] =
150 : : "pragma foreign_keys = on;\n"
151 : : "pragma synchronous = 0;\n" // disable fsync()s - this cache is disposable across a machine crash
152 : : "pragma journal_mode = wal;\n" // https://sqlite.org/wal.html
153 : : "pragma wal_checkpoint = truncate;\n" // clean out any preexisting wal file
154 : : "pragma journal_size_limit = 0;\n" // limit steady state file (between grooming, which also =truncate's)
155 : : "pragma auto_vacuum = incremental;\n" // https://sqlite.org/pragma.html
156 : : "pragma busy_timeout = 1000;\n" // https://sqlite.org/pragma.html
157 : : // NB: all these are overridable with -D option
158 : :
159 : : // Normalization table for interning file names
160 : : "create table if not exists " BUILDIDS "_files (\n"
161 : : " id integer primary key not null,\n"
162 : : " name text unique not null\n"
163 : : " );\n"
164 : : // Normalization table for interning buildids
165 : : "create table if not exists " BUILDIDS "_buildids (\n"
166 : : " id integer primary key not null,\n"
167 : : " hex text unique not null);\n"
168 : : // Track the completion of scanning of a given file & sourcetype at given time
169 : : "create table if not exists " BUILDIDS "_file_mtime_scanned (\n"
170 : : " mtime integer not null,\n"
171 : : " file integer not null,\n"
172 : : " size integer not null,\n" // in bytes
173 : : " sourcetype text(1) not null\n"
174 : : " check (sourcetype IN ('F', 'R')),\n"
175 : : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
176 : : " primary key (file, mtime, sourcetype)\n"
177 : : " ) " WITHOUT_ROWID ";\n"
178 : : "create table if not exists " BUILDIDS "_f_de (\n"
179 : : " buildid integer not null,\n"
180 : : " debuginfo_p integer not null,\n"
181 : : " executable_p integer not null,\n"
182 : : " file integer not null,\n"
183 : : " mtime integer not null,\n"
184 : : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
185 : : " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
186 : : " primary key (buildid, file, mtime)\n"
187 : : " ) " WITHOUT_ROWID ";\n"
188 : : // Index for faster delete by file identifier
189 : : "create index if not exists " BUILDIDS "_f_de_idx on " BUILDIDS "_f_de (file, mtime);\n"
190 : : "create table if not exists " BUILDIDS "_f_s (\n"
191 : : " buildid integer not null,\n"
192 : : " artifactsrc integer not null,\n"
193 : : " file integer not null,\n" // NB: not necessarily entered into _mtime_scanned
194 : : " mtime integer not null,\n"
195 : : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
196 : : " foreign key (artifactsrc) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
197 : : " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
198 : : " primary key (buildid, artifactsrc, file, mtime)\n"
199 : : " ) " WITHOUT_ROWID ";\n"
200 : : "create table if not exists " BUILDIDS "_r_de (\n"
201 : : " buildid integer not null,\n"
202 : : " debuginfo_p integer not null,\n"
203 : : " executable_p integer not null,\n"
204 : : " file integer not null,\n"
205 : : " mtime integer not null,\n"
206 : : " content integer not null,\n"
207 : : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
208 : : " foreign key (content) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
209 : : " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
210 : : " primary key (buildid, debuginfo_p, executable_p, file, content, mtime)\n"
211 : : " ) " WITHOUT_ROWID ";\n"
212 : : // Index for faster delete by archive file identifier
213 : : "create index if not exists " BUILDIDS "_r_de_idx on " BUILDIDS "_r_de (file, mtime);\n"
214 : : "create table if not exists " BUILDIDS "_r_sref (\n" // outgoing dwarf sourcefile references from rpm
215 : : " buildid integer not null,\n"
216 : : " artifactsrc integer not null,\n"
217 : : " foreign key (artifactsrc) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
218 : : " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
219 : : " primary key (buildid, artifactsrc)\n"
220 : : " ) " WITHOUT_ROWID ";\n"
221 : : "create table if not exists " BUILDIDS "_r_sdef (\n" // rpm contents that may satisfy sref
222 : : " file integer not null,\n"
223 : : " mtime integer not null,\n"
224 : : " content integer not null,\n"
225 : : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
226 : : " foreign key (content) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
227 : : " primary key (content, file, mtime)\n"
228 : : " ) " WITHOUT_ROWID ";\n"
229 : : // create views to glue together some of the above tables, for webapi D queries
230 : : "create view if not exists " BUILDIDS "_query_d as \n"
231 : : "select\n"
232 : : " b.hex as buildid, n.mtime, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1\n"
233 : : " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_f_de n\n"
234 : : " where b.id = n.buildid and f0.id = n.file and n.debuginfo_p = 1\n"
235 : : "union all select\n"
236 : : " b.hex as buildid, n.mtime, 'R' as sourcetype, f0.name as source0, n.mtime as mtime, f1.name as source1\n"
237 : : " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files f1, " BUILDIDS "_r_de n\n"
238 : : " where b.id = n.buildid and f0.id = n.file and f1.id = n.content and n.debuginfo_p = 1\n"
239 : : ";"
240 : : // ... and for E queries
241 : : "create view if not exists " BUILDIDS "_query_e as \n"
242 : : "select\n"
243 : : " b.hex as buildid, n.mtime, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1\n"
244 : : " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_f_de n\n"
245 : : " where b.id = n.buildid and f0.id = n.file and n.executable_p = 1\n"
246 : : "union all select\n"
247 : : " b.hex as buildid, n.mtime, 'R' as sourcetype, f0.name as source0, n.mtime as mtime, f1.name as source1\n"
248 : : " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files f1, " BUILDIDS "_r_de n\n"
249 : : " where b.id = n.buildid and f0.id = n.file and f1.id = n.content and n.executable_p = 1\n"
250 : : ";"
251 : : // ... and for S queries
252 : : "create view if not exists " BUILDIDS "_query_s as \n"
253 : : "select\n"
254 : : " b.hex as buildid, fs.name as artifactsrc, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1, null as source0ref\n"
255 : : " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files fs, " BUILDIDS "_f_s n\n"
256 : : " where b.id = n.buildid and f0.id = n.file and fs.id = n.artifactsrc\n"
257 : : "union all select\n"
258 : : " b.hex as buildid, f1.name as artifactsrc, 'R' as sourcetype, f0.name as source0, sd.mtime as mtime, f1.name as source1, fsref.name as source0ref\n"
259 : : " from " BUILDIDS "_buildids b, " BUILDIDS "_files f0, " BUILDIDS "_files f1, " BUILDIDS "_files fsref, "
260 : : " " BUILDIDS "_r_sdef sd, " BUILDIDS "_r_sref sr, " BUILDIDS "_r_de sde\n"
261 : : " where b.id = sr.buildid and f0.id = sd.file and fsref.id = sde.file and f1.id = sd.content\n"
262 : : " and sr.artifactsrc = sd.content and sde.buildid = sr.buildid\n"
263 : : ";"
264 : : // and for startup overview counts
265 : : "drop view if exists " BUILDIDS "_stats;\n"
266 : : "create view if not exists " BUILDIDS "_stats as\n"
267 : : " select 'file d/e' as label,count(*) as quantity from " BUILDIDS "_f_de\n"
268 : : "union all select 'file s',count(*) from " BUILDIDS "_f_s\n"
269 : : "union all select 'archive d/e',count(*) from " BUILDIDS "_r_de\n"
270 : : "union all select 'archive sref',count(*) from " BUILDIDS "_r_sref\n"
271 : : "union all select 'archive sdef',count(*) from " BUILDIDS "_r_sdef\n"
272 : : "union all select 'buildids',count(*) from " BUILDIDS "_buildids\n"
273 : : "union all select 'filenames',count(*) from " BUILDIDS "_files\n"
274 : : "union all select 'files scanned (#)',count(*) from " BUILDIDS "_file_mtime_scanned\n"
275 : : "union all select 'files scanned (mb)',coalesce(sum(size)/1024/1024,0) from " BUILDIDS "_file_mtime_scanned\n"
276 : : #if SQLITE_VERSION_NUMBER >= 3016000
277 : : "union all select 'index db size (mb)',page_count*page_size/1024/1024 as size FROM pragma_page_count(), pragma_page_size()\n"
278 : : #endif
279 : : ";\n"
280 : :
281 : : // schema change history & garbage collection
282 : : //
283 : : // XXX: we could have migration queries here to bring prior-schema
284 : : // data over instead of just dropping it.
285 : : //
286 : : // buildids9: widen the mtime_scanned table
287 : : "" // <<< we are here
288 : : // buildids8: slim the sref table
289 : : "drop table if exists buildids8_f_de;\n"
290 : : "drop table if exists buildids8_f_s;\n"
291 : : "drop table if exists buildids8_r_de;\n"
292 : : "drop table if exists buildids8_r_sref;\n"
293 : : "drop table if exists buildids8_r_sdef;\n"
294 : : "drop table if exists buildids8_file_mtime_scanned;\n"
295 : : "drop table if exists buildids8_files;\n"
296 : : "drop table if exists buildids8_buildids;\n"
297 : : // buildids7: separate _norm table into dense subtype tables
298 : : "drop table if exists buildids7_f_de;\n"
299 : : "drop table if exists buildids7_f_s;\n"
300 : : "drop table if exists buildids7_r_de;\n"
301 : : "drop table if exists buildids7_r_sref;\n"
302 : : "drop table if exists buildids7_r_sdef;\n"
303 : : "drop table if exists buildids7_file_mtime_scanned;\n"
304 : : "drop table if exists buildids7_files;\n"
305 : : "drop table if exists buildids7_buildids;\n"
306 : : // buildids6: drop bolo/rfolo again, represent sources / rpmcontents in main table
307 : : "drop table if exists buildids6_norm;\n"
308 : : "drop table if exists buildids6_files;\n"
309 : : "drop table if exists buildids6_buildids;\n"
310 : : "drop view if exists buildids6;\n"
311 : : // buildids5: redefine srcfile1 column to be '.'-less (for rpms)
312 : : "drop table if exists buildids5_norm;\n"
313 : : "drop table if exists buildids5_files;\n"
314 : : "drop table if exists buildids5_buildids;\n"
315 : : "drop table if exists buildids5_bolo;\n"
316 : : "drop table if exists buildids5_rfolo;\n"
317 : : "drop view if exists buildids5;\n"
318 : : // buildids4: introduce rpmfile RFOLO
319 : : "drop table if exists buildids4_norm;\n"
320 : : "drop table if exists buildids4_files;\n"
321 : : "drop table if exists buildids4_buildids;\n"
322 : : "drop table if exists buildids4_bolo;\n"
323 : : "drop table if exists buildids4_rfolo;\n"
324 : : "drop view if exists buildids4;\n"
325 : : // buildids3*: split out srcfile BOLO
326 : : "drop table if exists buildids3_norm;\n"
327 : : "drop table if exists buildids3_files;\n"
328 : : "drop table if exists buildids3_buildids;\n"
329 : : "drop table if exists buildids3_bolo;\n"
330 : : "drop view if exists buildids3;\n"
331 : : // buildids2: normalized buildid and filenames into interning tables;
332 : : "drop table if exists buildids2_norm;\n"
333 : : "drop table if exists buildids2_files;\n"
334 : : "drop table if exists buildids2_buildids;\n"
335 : : "drop view if exists buildids2;\n"
336 : : // buildids1: made buildid and artifacttype NULLable, to represent cached-negative
337 : : // lookups from sources, e.g. files or rpms that contain no buildid-indexable content
338 : : "drop table if exists buildids1;\n"
339 : : // buildids: original
340 : : "drop table if exists buildids;\n"
341 : : ;
342 : :
343 : : static const char DEBUGINFOD_SQLITE_CLEANUP_DDL[] =
344 : : "pragma wal_checkpoint = truncate;\n" // clean out any preexisting wal file
345 : : ;
346 : :
347 : :
348 : :
349 : :
350 : : /* Name and version of program. */
351 : : ARGP_PROGRAM_VERSION_HOOK_DEF = print_version;
352 : :
353 : : /* Bug report address. */
354 : : ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT;
355 : :
356 : : /* Definitions of arguments for argp functions. */
357 : : static const struct argp_option options[] =
358 : : {
359 : : { NULL, 0, NULL, 0, "Scanners:", 1 },
360 : : { "scan-file-dir", 'F', NULL, 0, "Enable ELF/DWARF file scanning.", 0 },
361 : : { "scan-rpm-dir", 'R', NULL, 0, "Enable RPM scanning.", 0 },
362 : : { "scan-deb-dir", 'U', NULL, 0, "Enable DEB scanning.", 0 },
363 : : { "scan-archive", 'Z', "EXT=CMD", 0, "Enable arbitrary archive scanning.", 0 },
364 : : // "source-oci-imageregistry" ...
365 : :
366 : : { NULL, 0, NULL, 0, "Options:", 2 },
367 : : { "logical", 'L', NULL, 0, "Follow symlinks, default=ignore.", 0 },
368 : : { "rescan-time", 't', "SECONDS", 0, "Number of seconds to wait between rescans, 0=disable.", 0 },
369 : : { "groom-time", 'g', "SECONDS", 0, "Number of seconds to wait between database grooming, 0=disable.", 0 },
370 : : { "maxigroom", 'G', NULL, 0, "Run a complete database groom/shrink pass at startup.", 0 },
371 : : { "concurrency", 'c', "NUM", 0, "Limit scanning thread concurrency to NUM, default=#CPUs.", 0 },
372 : : { "connection-pool", 'C', "NUM", OPTION_ARG_OPTIONAL,
373 : : "Use webapi connection pool with NUM threads, default=unlim.", 0 },
374 : : { "include", 'I', "REGEX", 0, "Include files matching REGEX, default=all.", 0 },
375 : : { "exclude", 'X', "REGEX", 0, "Exclude files matching REGEX, default=none.", 0 },
376 : : { "port", 'p', "NUM", 0, "HTTP port to listen on, default 8002.", 0 },
377 : : { "database", 'd', "FILE", 0, "Path to sqlite database.", 0 },
378 : : { "ddl", 'D', "SQL", 0, "Apply extra sqlite ddl/pragma to connection.", 0 },
379 : : { "verbose", 'v', NULL, 0, "Increase verbosity.", 0 },
380 : : { "regex-groom", 'r', NULL, 0,"Uses regexes from -I and -X arguments to groom the database.",0},
381 : : #define ARGP_KEY_FDCACHE_FDS 0x1001
382 : : { "fdcache-fds", ARGP_KEY_FDCACHE_FDS, "NUM", 0, "Maximum number of archive files to keep in fdcache.", 0 },
383 : : #define ARGP_KEY_FDCACHE_MBS 0x1002
384 : : { "fdcache-mbs", ARGP_KEY_FDCACHE_MBS, "MB", 0, "Maximum total size of archive file fdcache.", 0 },
385 : : #define ARGP_KEY_FDCACHE_PREFETCH 0x1003
386 : : { "fdcache-prefetch", ARGP_KEY_FDCACHE_PREFETCH, "NUM", 0, "Number of archive files to prefetch into fdcache.", 0 },
387 : : #define ARGP_KEY_FDCACHE_MINTMP 0x1004
388 : : { "fdcache-mintmp", ARGP_KEY_FDCACHE_MINTMP, "NUM", 0, "Minimum free space% on tmpdir.", 0 },
389 : : #define ARGP_KEY_FDCACHE_PREFETCH_MBS 0x1005
390 : : { "fdcache-prefetch-mbs", ARGP_KEY_FDCACHE_PREFETCH_MBS, "MB", 0,"Megabytes allocated to the \
391 : : prefetch cache.", 0},
392 : : #define ARGP_KEY_FDCACHE_PREFETCH_FDS 0x1006
393 : : { "fdcache-prefetch-fds", ARGP_KEY_FDCACHE_PREFETCH_FDS, "NUM", 0,"Number of files allocated to the \
394 : : prefetch cache.", 0},
395 : : #define ARGP_KEY_FORWARDED_TTL_LIMIT 0x1007
396 : : {"forwarded-ttl-limit", ARGP_KEY_FORWARDED_TTL_LIMIT, "NUM", 0, "Limit of X-Forwarded-For hops, default 8.", 0},
397 : : #define ARGP_KEY_PASSIVE 0x1008
398 : : { "passive", ARGP_KEY_PASSIVE, NULL, 0, "Do not scan or groom, read-only database.", 0 },
399 : : #define ARGP_KEY_DISABLE_SOURCE_SCAN 0x1009
400 : : { "disable-source-scan", ARGP_KEY_DISABLE_SOURCE_SCAN, NULL, 0, "Do not scan dwarf source info.", 0 },
401 : : { NULL, 0, NULL, 0, NULL, 0 },
402 : : };
403 : :
404 : : /* Short description of program. */
405 : : static const char doc[] = "Serve debuginfo-related content across HTTP from files under PATHs.";
406 : :
407 : : /* Strings for arguments in help texts. */
408 : : static const char args_doc[] = "[PATH ...]";
409 : :
410 : : /* Prototype for option handler. */
411 : : static error_t parse_opt (int key, char *arg, struct argp_state *state);
412 : :
413 : : static unsigned default_concurrency();
414 : :
415 : : /* Data structure to communicate with argp functions. */
416 : : static struct argp argp =
417 : : {
418 : : options, parse_opt, args_doc, doc, NULL, NULL, NULL
419 : : };
420 : :
421 : :
422 : : static string db_path;
423 : : static sqlite3 *db; // single connection, serialized across all our threads!
424 : : static sqlite3 *dbq; // webapi query-servicing readonly connection, serialized ditto!
425 : : static unsigned verbose;
426 : : static volatile sig_atomic_t interrupted = 0;
427 : : static volatile sig_atomic_t forced_rescan_count = 0;
428 : : static volatile sig_atomic_t sigusr1 = 0;
429 : : static volatile sig_atomic_t forced_groom_count = 0;
430 : : static volatile sig_atomic_t sigusr2 = 0;
431 : : static unsigned http_port = 8002;
432 : : static unsigned rescan_s = 300;
433 : : static unsigned groom_s = 86400;
434 : : static bool maxigroom = false;
435 : : static unsigned concurrency = default_concurrency();
436 : : static int connection_pool = 0;
437 : : static set<string> source_paths;
438 : : static bool scan_files = false;
439 : : static map<string,string> scan_archives;
440 : : static vector<string> extra_ddl;
441 : : static regex_t file_include_regex;
442 : : static regex_t file_exclude_regex;
443 : : static bool regex_groom = false;
444 : : static bool traverse_logical;
445 : : static long fdcache_fds;
446 : : static long fdcache_mbs;
447 : : static long fdcache_prefetch;
448 : : static long fdcache_mintmp;
449 : : static long fdcache_prefetch_mbs;
450 : : static long fdcache_prefetch_fds;
451 : : static unsigned forwarded_ttl_limit = 8;
452 : : static bool scan_source_info = true;
453 : : static string tmpdir;
454 : : static bool passive_p = false;
455 : :
456 : : static void set_metric(const string& key, double value);
457 : : // static void inc_metric(const string& key);
458 : : static void set_metric(const string& metric,
459 : : const string& lname, const string& lvalue,
460 : : double value);
461 : : static void inc_metric(const string& metric,
462 : : const string& lname, const string& lvalue);
463 : : static void add_metric(const string& metric,
464 : : const string& lname, const string& lvalue,
465 : : double value);
466 : : static void inc_metric(const string& metric,
467 : : const string& lname, const string& lvalue,
468 : : const string& rname, const string& rvalue);
469 : : static void add_metric(const string& metric,
470 : : const string& lname, const string& lvalue,
471 : : const string& rname, const string& rvalue,
472 : : double value);
473 : :
474 : :
475 : : class tmp_inc_metric { // a RAII style wrapper for exception-safe scoped increment & decrement
476 : : string m, n, v;
477 : : public:
478 : 793 : tmp_inc_metric(const string& mname, const string& lname, const string& lvalue):
479 [ + - + - : 793 : m(mname), n(lname), v(lvalue)
- - - - -
- ]
480 : : {
481 [ + - ]: 793 : add_metric (m, n, v, 1);
482 : 793 : }
483 : 793 : ~tmp_inc_metric()
484 [ - + - + : 793 : {
- + ]
485 : 793 : add_metric (m, n, v, -1);
486 : 793 : }
487 : : };
488 : :
489 : : class tmp_ms_metric { // a RAII style wrapper for exception-safe scoped timing
490 : : string m, n, v;
491 : : struct timespec ts_start;
492 : : public:
493 : 17020 : tmp_ms_metric(const string& mname, const string& lname, const string& lvalue):
494 [ + - + - : 17020 : m(mname), n(lname), v(lvalue)
- - - - ]
495 : : {
496 : 17022 : clock_gettime (CLOCK_MONOTONIC, & ts_start);
497 : 17022 : }
498 : 17020 : ~tmp_ms_metric()
499 [ - + - + ]: 17023 : {
500 : 17020 : struct timespec ts_end;
501 : 17020 : clock_gettime (CLOCK_MONOTONIC, & ts_end);
502 : 17023 : double deltas = (ts_end.tv_sec - ts_start.tv_sec)
503 : 17023 : + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9;
504 : :
505 [ + - ]: 17023 : add_metric (m + "_milliseconds_sum", n, v, (deltas*1000.0));
506 [ + - + + ]: 34046 : inc_metric (m + "_milliseconds_count", n, v);
507 : 17023 : }
508 : : };
509 : :
510 : :
511 : : /* Handle program arguments. */
512 : : static error_t
513 : 561 : parse_opt (int key, char *arg,
514 : : struct argp_state *state __attribute__ ((unused)))
515 : : {
516 : 561 : int rc;
517 [ + + + + : 561 : switch (key)
+ + + + -
+ + - - +
+ + + + +
+ + + + +
+ + - + ]
518 : : {
519 : 139 : case 'v': verbose ++; break;
520 : 36 : case 'd':
521 : : /* When using the in-memory database make sure it is shareable,
522 : : so we can open it twice as read/write and read-only. */
523 [ + + ]: 36 : if (strcmp (arg, ":memory:") == 0)
524 : 561 : db_path = "file::memory:?cache=shared";
525 : : else
526 [ + - ]: 58 : db_path = string(arg);
527 : : break;
528 : 36 : case 'p': http_port = (unsigned) atoi(arg);
529 [ - + ]: 36 : if (http_port == 0 || http_port > 65535)
530 : 0 : argp_failure(state, 1, EINVAL, "port number");
531 : : break;
532 : 24 : case 'F': scan_files = true; break;
533 : 11 : case 'R':
534 [ + - + - : 11 : scan_archives[".rpm"]="cat"; // libarchive groks rpm natively
- + ]
535 : 11 : break;
536 : 7 : case 'U':
537 [ + - + - : 7 : scan_archives[".deb"]="(bsdtar -O -x -f - data.tar\\*)<";
- + ]
538 [ + - + - : 7 : scan_archives[".ddeb"]="(bsdtar -O -x -f - data.tar\\*)<";
- + ]
539 [ + - + - : 7 : scan_archives[".ipk"]="(bsdtar -O -x -f - data.tar\\*)<";
- + ]
540 : : // .udeb too?
541 : 7 : break;
542 : 19 : case 'Z':
543 : 19 : {
544 [ - + ]: 19 : char* extension = strchr(arg, '=');
545 [ - + ]: 19 : if (arg[0] == '\0')
546 : 0 : argp_failure(state, 1, EINVAL, "missing EXT");
547 [ + + ]: 19 : else if (extension)
548 [ + - + - : 20 : scan_archives[string(arg, (extension-arg))]=string(extension+1);
- + - + -
- ]
549 : : else
550 [ + - + - : 9 : scan_archives[string(arg)]=string("cat");
- + - + -
- ]
551 : : }
552 : : break;
553 : 4 : case 'L':
554 [ - + ]: 4 : if (passive_p)
555 : 0 : argp_failure(state, 1, EINVAL, "-L option inconsistent with passive mode");
556 : 4 : traverse_logical = true;
557 : 4 : break;
558 : 0 : case 'D':
559 [ # # ]: 0 : if (passive_p)
560 : 0 : argp_failure(state, 1, EINVAL, "-D option inconsistent with passive mode");
561 [ # # ]: 0 : extra_ddl.push_back(string(arg));
562 : 0 : break;
563 : 29 : case 't':
564 [ - + ]: 29 : if (passive_p)
565 : 0 : argp_failure(state, 1, EINVAL, "-t option inconsistent with passive mode");
566 : 29 : rescan_s = (unsigned) atoi(arg);
567 : 29 : break;
568 : 29 : case 'g':
569 [ - + ]: 29 : if (passive_p)
570 : 0 : argp_failure(state, 1, EINVAL, "-g option inconsistent with passive mode");
571 : 29 : groom_s = (unsigned) atoi(arg);
572 : 29 : break;
573 : 0 : case 'G':
574 [ # # ]: 0 : if (passive_p)
575 : 0 : argp_failure(state, 1, EINVAL, "-G option inconsistent with passive mode");
576 : 0 : maxigroom = true;
577 : 0 : break;
578 : 0 : case 'c':
579 [ # # ]: 0 : if (passive_p)
580 : 0 : argp_failure(state, 1, EINVAL, "-c option inconsistent with passive mode");
581 : 0 : concurrency = (unsigned) atoi(arg);
582 [ # # ]: 0 : if (concurrency < 1) concurrency = 1;
583 : : break;
584 : 3 : case 'C':
585 [ + + ]: 3 : if (arg)
586 : : {
587 : 2 : connection_pool = atoi(arg);
588 [ - + ]: 2 : if (connection_pool < 2)
589 : 0 : argp_failure(state, 1, EINVAL, "-C NUM minimum 2");
590 : : }
591 : : break;
592 : 2 : case 'I':
593 : : // NB: no problem with unconditional free here - an earlier failed regcomp would exit program
594 [ - + ]: 2 : if (passive_p)
595 : 0 : argp_failure(state, 1, EINVAL, "-I option inconsistent with passive mode");
596 : 2 : regfree (&file_include_regex);
597 : 2 : rc = regcomp (&file_include_regex, arg, REG_EXTENDED|REG_NOSUB);
598 [ - + ]: 2 : if (rc != 0)
599 : 0 : argp_failure(state, 1, EINVAL, "regular expression");
600 : : break;
601 : 3 : case 'X':
602 [ - + ]: 3 : if (passive_p)
603 : 0 : argp_failure(state, 1, EINVAL, "-X option inconsistent with passive mode");
604 : 3 : regfree (&file_exclude_regex);
605 : 3 : rc = regcomp (&file_exclude_regex, arg, REG_EXTENDED|REG_NOSUB);
606 [ - + ]: 3 : if (rc != 0)
607 : 0 : argp_failure(state, 1, EINVAL, "regular expression");
608 : : break;
609 : 2 : case 'r':
610 [ - + ]: 2 : if (passive_p)
611 : 0 : argp_failure(state, 1, EINVAL, "-r option inconsistent with passive mode");
612 : 2 : regex_groom = true;
613 : 2 : break;
614 : 5 : case ARGP_KEY_FDCACHE_FDS:
615 : 5 : fdcache_fds = atol (arg);
616 : 5 : break;
617 : 2 : case ARGP_KEY_FDCACHE_MBS:
618 : 2 : fdcache_mbs = atol (arg);
619 : 2 : break;
620 : 1 : case ARGP_KEY_FDCACHE_PREFETCH:
621 : 1 : fdcache_prefetch = atol (arg);
622 : 1 : break;
623 : 1 : case ARGP_KEY_FDCACHE_MINTMP:
624 : 1 : fdcache_mintmp = atol (arg);
625 [ - + ]: 1 : if( fdcache_mintmp > 100 || fdcache_mintmp < 0 )
626 : 0 : argp_failure(state, 1, EINVAL, "fdcache mintmp percent");
627 : : break;
628 : 2 : case ARGP_KEY_FORWARDED_TTL_LIMIT:
629 : 2 : forwarded_ttl_limit = (unsigned) atoi(arg);
630 : 2 : break;
631 : 50 : case ARGP_KEY_ARG:
632 [ + - ]: 100 : source_paths.insert(string(arg));
633 : 50 : break;
634 : 5 : case ARGP_KEY_FDCACHE_PREFETCH_FDS:
635 : 5 : fdcache_prefetch_fds = atol(arg);
636 [ - + ]: 5 : if ( fdcache_prefetch_fds < 0)
637 : 0 : argp_failure(state, 1, EINVAL, "fdcache prefetch fds");
638 : : break;
639 : 1 : case ARGP_KEY_FDCACHE_PREFETCH_MBS:
640 : 1 : fdcache_prefetch_mbs = atol(arg);
641 [ - + ]: 1 : if ( fdcache_prefetch_mbs < 0)
642 : 0 : argp_failure(state, 1, EINVAL, "fdcache prefetch mbs");
643 : : break;
644 : 1 : case ARGP_KEY_PASSIVE:
645 : 1 : passive_p = true;
646 [ + - ]: 1 : if (source_paths.size() > 0
647 [ + - ]: 1 : || maxigroom
648 [ + - ]: 1 : || extra_ddl.size() > 0
649 [ + - - + ]: 2 : || traverse_logical)
650 : : // other conflicting options tricky to check
651 : 0 : argp_failure(state, 1, EINVAL, "inconsistent options with passive mode");
652 : : break;
653 : 0 : case ARGP_KEY_DISABLE_SOURCE_SCAN:
654 : 0 : scan_source_info = false;
655 : 0 : break;
656 : : // case 'h': argp_state_help (state, stderr, ARGP_HELP_LONG|ARGP_HELP_EXIT_OK);
657 : : default: return ARGP_ERR_UNKNOWN;
658 : : }
659 : :
660 : : return 0;
661 : : }
662 : :
663 : :
664 : : ////////////////////////////////////////////////////////////////////////
665 : :
666 : :
667 : : static void add_mhd_response_header (struct MHD_Response *r,
668 : : const char *h, const char *v);
669 : :
670 : : // represent errors that may get reported to an ostream and/or a libmicrohttpd connection
671 : :
672 : 4 : struct reportable_exception
673 : : {
674 : : int code;
675 : : string message;
676 : :
677 [ - - + - : 43 : reportable_exception(int c, const string& m): code(c), message(m) {}
- - + - +
- ]
678 [ - - - - : 63 : reportable_exception(const string& m): code(503), message(m) {}
- - + - -
- + - - -
- - + - -
- - - - -
+ - - - ]
679 : : reportable_exception(): code(503), message() {}
680 : :
681 : : void report(ostream& o) const; // defined under obatched() class below
682 : :
683 : 75 : MHD_RESULT mhd_send_response(MHD_Connection* c) const {
684 : 150 : MHD_Response* r = MHD_create_response_from_buffer (message.size(),
685 : 75 : (void*) message.c_str(),
686 : : MHD_RESPMEM_MUST_COPY);
687 : 75 : add_mhd_response_header (r, "Content-Type", "text/plain");
688 : 75 : MHD_RESULT rc = MHD_queue_response (c, code, r);
689 : 75 : MHD_destroy_response (r);
690 : 75 : return rc;
691 : : }
692 : : };
693 : :
694 : :
695 : : struct sqlite_exception: public reportable_exception
696 : : {
697 : 0 : sqlite_exception(int rc, const string& msg):
698 [ # # # # : 0 : reportable_exception(string("sqlite3 error: ") + msg + ": " + string(sqlite3_errstr(rc) ?: "?")) {
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# ]
699 [ # # # # : 0 : inc_metric("error_count","sqlite3",sqlite3_errstr(rc));
# # # # #
# # # # #
# # # # #
# ]
700 : 0 : }
701 : : };
702 : :
703 [ + - - - ]: 2 : struct libc_exception: public reportable_exception
704 : : {
705 : 57 : libc_exception(int rc, const string& msg):
706 [ - + + - : 228 : reportable_exception(string("libc error: ") + msg + ": " + string(strerror(rc) ?: "?")) {
+ - + - +
- + - - +
- + - + +
- - - - -
- - - - ]
707 [ + - + - : 171 : inc_metric("error_count","libc",strerror(rc));
+ - + - -
+ + - - -
- - - - ]
708 : 57 : }
709 : : };
710 : :
711 : :
712 : : struct archive_exception: public reportable_exception
713 : : {
714 : 0 : archive_exception(const string& msg):
715 [ # # # # : 0 : reportable_exception(string("libarchive error: ") + msg) {
# # # # #
# ]
716 [ # # # # : 0 : inc_metric("error_count","libarchive",msg);
# # # # #
# # # ]
717 : 0 : }
718 : 0 : archive_exception(struct archive* a, const string& msg):
719 [ # # # # : 0 : reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) {
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# ]
720 [ # # # # : 0 : inc_metric("error_count","libarchive",msg + ": " + string(archive_error_string(a) ?: "?"));
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# # # #
# ]
721 : 0 : }
722 : : };
723 : :
724 : :
725 : : struct elfutils_exception: public reportable_exception
726 : : {
727 : 0 : elfutils_exception(int rc, const string& msg):
728 [ # # # # : 0 : reportable_exception(string("elfutils error: ") + msg + ": " + string(elf_errmsg(rc) ?: "?")) {
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# ]
729 [ # # # # : 0 : inc_metric("error_count","elfutils",elf_errmsg(rc));
# # # # #
# # # # #
# # # # #
# ]
730 : 0 : }
731 : : };
732 : :
733 : :
734 : : ////////////////////////////////////////////////////////////////////////
735 : :
736 : : template <typename Payload>
737 : : class workq
738 : : {
739 : : set<Payload> q; // eliminate duplicates
740 : : mutex mtx;
741 : : condition_variable cv;
742 : : bool dead;
743 : : unsigned idlers; // number of threads busy with wait_idle / done_idle
744 : : unsigned fronters; // number of threads busy with wait_front / done_front
745 : :
746 : : public:
747 : 36 : workq() { dead = false; idlers = 0; fronters = 0; }
748 : 36 : ~workq() {}
749 : :
750 : 470 : void push_back(const Payload& p)
751 : : {
752 : 470 : unique_lock<mutex> lock(mtx);
753 [ + - ]: 470 : q.insert (p);
754 [ + - + - : 940 : set_metric("thread_work_pending","role","scan", q.size());
+ - + - -
+ - + - -
- - - - ]
755 [ + - ]: 470 : cv.notify_all();
756 : 470 : }
757 : :
758 : : // kill this workqueue, wake up all idlers / scanners
759 : 36 : void nuke() {
760 : 36 : unique_lock<mutex> lock(mtx);
761 : : // optional: q.clear();
762 : 36 : dead = true;
763 [ + - ]: 36 : cv.notify_all();
764 : 36 : }
765 : :
766 : : // clear the workqueue, when scanning is interrupted with USR2
767 : 0 : void clear() {
768 : 0 : unique_lock<mutex> lock(mtx);
769 : 0 : q.clear();
770 [ # # # # : 0 : set_metric("thread_work_pending","role","scan", q.size());
# # # # #
# # # # #
# # # # ]
771 : : // NB: there may still be some live fronters
772 [ # # ]: 0 : cv.notify_all(); // maybe wake up waiting idlers
773 : 0 : }
774 : :
775 : : // block this scanner thread until there is work to do and no active idler
776 : 598 : bool wait_front (Payload& p)
777 : : {
778 : 1196 : unique_lock<mutex> lock(mtx);
779 [ + + + + : 2892 : while (!dead && (q.size() == 0 || idlers > 0))
+ + ]
780 : 2294 : cv.wait(lock);
781 [ + + ]: 598 : if (dead)
782 : : return false;
783 : : else
784 : : {
785 [ + - ]: 470 : p = * q.begin();
786 : 470 : q.erase (q.begin());
787 : 470 : fronters ++; // prevent idlers from starting awhile, even if empty q
788 [ + - + - : 940 : set_metric("thread_work_pending","role","scan", q.size());
+ - + - -
+ - + - -
- - - - ]
789 : : // NB: don't wake up idlers yet! The consumer is busy
790 : : // processing this element until it calls done_front().
791 : 470 : return true;
792 : : }
793 : : }
794 : :
795 : : // notify waitq that scanner thread is done with that last item
796 : 470 : void done_front ()
797 : : {
798 : 470 : unique_lock<mutex> lock(mtx);
799 : 470 : fronters --;
800 [ + + + + ]: 470 : if (q.size() == 0 && fronters == 0)
801 : 82 : cv.notify_all(); // maybe wake up waiting idlers
802 : 470 : }
803 : :
804 : : // block this idler thread until there is no work to do
805 : 310 : void wait_idle ()
806 : : {
807 : 310 : unique_lock<mutex> lock(mtx);
808 : 310 : cv.notify_all(); // maybe wake up waiting scanners
809 [ + + + + : 325 : while (!dead && ((q.size() != 0) || fronters > 0))
+ + ]
810 : 15 : cv.wait(lock);
811 [ + - ]: 310 : idlers ++;
812 : 310 : }
813 : :
814 : 275 : void done_idle ()
815 : : {
816 : 275 : unique_lock<mutex> lock(mtx);
817 : 275 : idlers --;
818 [ + - ]: 275 : cv.notify_all(); // maybe wake up waiting scanners, but probably not (shutting down)
819 : 275 : }
820 : : };
821 : :
822 : : typedef struct stat stat_t;
823 : : typedef pair<string,stat_t> scan_payload;
824 : 1329 : inline bool operator< (const scan_payload& a, const scan_payload& b)
825 : : {
826 [ + + + + : 1329 : return a.first < b.first; // don't bother compare the stat fields
+ - ]
827 : : }
828 : : static workq<scan_payload> scanq; // just a single one
829 : : // producer & idler: thread_main_fts_source_paths()
830 : : // consumer: thread_main_scanner()
831 : : // idler: thread_main_groom()
832 : :
833 : :
834 : : ////////////////////////////////////////////////////////////////////////
835 : :
836 : : // Unique set is a thread-safe structure that lends 'ownership' of a value
837 : : // to a thread. Other threads requesting the same thing are made to wait.
838 : : // It's like a semaphore-on-demand.
839 : : template <typename T>
840 : : class unique_set
841 : : {
842 : : private:
843 : : set<T> values;
844 : : mutex mtx;
845 : : condition_variable cv;
846 : : public:
847 : 28 : unique_set() {}
848 : 28 : ~unique_set() {}
849 : :
850 : 452 : void acquire(const T& value)
851 : : {
852 : 452 : unique_lock<mutex> lock(mtx);
853 [ + + ]: 2867 : while (values.find(value) != values.end())
854 : 2415 : cv.wait(lock);
855 [ + - ]: 452 : values.insert(value);
856 : 452 : }
857 : :
858 : 452 : void release(const T& value)
859 : : {
860 : 452 : unique_lock<mutex> lock(mtx);
861 : : // assert (values.find(value) != values.end());
862 : 452 : values.erase(value);
863 [ + - ]: 452 : cv.notify_all();
864 : 452 : }
865 : : };
866 : :
867 : :
868 : : // This is the object that's instantiate to uniquely hold a value in a
869 : : // RAII-pattern way.
870 : : template <typename T>
871 : : class unique_set_reserver
872 : : {
873 : : private:
874 : : unique_set<T>& please_hold;
875 : : T mine;
876 : : public:
877 : 452 : unique_set_reserver(unique_set<T>& t, const T& value):
878 [ + - - - ]: 452 : please_hold(t), mine(value) { please_hold.acquire(mine); }
879 [ + - ]: 452 : ~unique_set_reserver() { please_hold.release(mine); }
880 : : };
881 : :
882 : :
883 : : ////////////////////////////////////////////////////////////////////////
884 : :
885 : :
886 : : // Print a standard timestamp.
887 : : static ostream&
888 : 9263 : timestamp (ostream &o)
889 : : {
890 : 9263 : char datebuf[80];
891 : 9263 : char *now2 = NULL;
892 : 9263 : time_t now_t = time(NULL);
893 : 9264 : struct tm now;
894 : 9264 : struct tm *nowp = gmtime_r (&now_t, &now);
895 [ + - ]: 9265 : if (nowp)
896 : : {
897 : 9265 : (void) strftime (datebuf, sizeof (datebuf), "%c", nowp);
898 : 9265 : now2 = datebuf;
899 : : }
900 : :
901 : 9265 : return o << "[" << (now2 ? now2 : "") << "] "
902 [ - + ]: 9265 : << "(" << getpid () << "/" << tid() << "): ";
903 : : }
904 : :
905 : :
906 : : // A little class that impersonates an ostream to the extent that it can
907 : : // take << streaming operations. It batches up the bits into an internal
908 : : // stringstream until it is destroyed; then flushes to the original ostream.
909 : : // It adds a timestamp
910 : : class obatched
911 : : {
912 : : private:
913 : : ostream& o;
914 : : stringstream stro;
915 : : static mutex lock;
916 : : public:
917 : 9266 : obatched(ostream& oo, bool timestamp_p = true): o(oo)
918 : : {
919 [ + - ]: 9263 : if (timestamp_p)
920 [ + - ]: 9263 : timestamp(stro);
921 : 9266 : }
922 : 9266 : ~obatched()
923 : 18532 : {
924 : 18532 : unique_lock<mutex> do_not_cross_the_streams(obatched::lock);
925 [ + - ]: 9266 : o << stro.str();
926 [ + - ]: 9266 : o.flush();
927 : 9266 : }
928 : : operator ostream& () { return stro; }
929 [ - - + - : 7732 : template <typename T> ostream& operator << (const T& t) { stro << t; return stro; }
+ - + - +
- + - + -
- - - - -
- + - + -
+ - + - +
- + - + -
+ - + - +
- + - + -
+ - + - -
- + - + -
+ - + - +
- + - + -
+ - - - +
- + - + -
+ - + - +
- + - + -
+ - + - +
- + - - -
- - + - -
- + - - -
- - - - +
- - - + -
- - + - -
- - - + -
+ - + - +
- + - ]
930 : : };
931 : : mutex obatched::lock; // just the one, since cout/cerr iostreams are not thread-safe
932 : :
933 : :
934 : 106 : void reportable_exception::report(ostream& o) const {
935 [ + - + - ]: 106 : obatched(o) << message << endl;
936 : 106 : }
937 : :
938 : :
939 : : ////////////////////////////////////////////////////////////////////////
940 : :
941 : :
942 : : // RAII style sqlite prepared-statement holder that matches { } block lifetime
943 : :
944 : : struct sqlite_ps
945 : : {
946 : : private:
947 : : sqlite3* db;
948 : : const string nickname;
949 : : const string sql;
950 : : sqlite3_stmt *pp;
951 : :
952 : : sqlite_ps(const sqlite_ps&); // make uncopyable
953 : : sqlite_ps& operator=(const sqlite_ps &); // make unassignable
954 : :
955 : : public:
956 [ + - - - ]: 2448 : sqlite_ps (sqlite3* d, const string& n, const string& s): db(d), nickname(n), sql(s) {
957 : : // tmp_ms_metric tick("sqlite3","prep",nickname);
958 [ + + ]: 2448 : if (verbose > 4)
959 [ + - + - : 142 : obatched(clog) << nickname << " prep " << sql << endl;
+ - + - +
- - - ]
960 [ + - ]: 2448 : int rc = sqlite3_prepare_v2 (db, sql.c_str(), -1 /* to \0 */, & this->pp, NULL);
961 [ - + ]: 2448 : if (rc != SQLITE_OK)
962 [ # # # # ]: 0 : throw sqlite_exception(rc, "prepare " + sql);
963 : 2448 : }
964 : :
965 : 8688 : sqlite_ps& reset()
966 : : {
967 [ + - + - : 17377 : tmp_ms_metric tick("sqlite3","reset",nickname);
- + + - -
- ]
968 [ + - ]: 8689 : sqlite3_reset(this->pp);
969 : 8690 : return *this;
970 : : }
971 : :
972 : 11411 : sqlite_ps& bind(int parameter, const string& str)
973 : : {
974 [ + + ]: 11411 : if (verbose > 4)
975 [ + - + - : 338 : obatched(clog) << nickname << " bind " << parameter << "=" << str << endl;
+ - + - +
- + - ]
976 : 11411 : int rc = sqlite3_bind_text (this->pp, parameter, str.c_str(), -1, SQLITE_TRANSIENT);
977 [ - + ]: 11413 : if (rc != SQLITE_OK)
978 [ # # # # ]: 0 : throw sqlite_exception(rc, "sqlite3 bind");
979 : 11413 : return *this;
980 : : }
981 : :
982 : 3749 : sqlite_ps& bind(int parameter, int64_t value)
983 : : {
984 [ + + ]: 3749 : if (verbose > 4)
985 [ + - + - : 159 : obatched(clog) << nickname << " bind " << parameter << "=" << value << endl;
+ - + - +
- + - ]
986 : 3749 : int rc = sqlite3_bind_int64 (this->pp, parameter, value);
987 [ - + ]: 3750 : if (rc != SQLITE_OK)
988 [ # # # # ]: 0 : throw sqlite_exception(rc, "sqlite3 bind");
989 : 3750 : return *this;
990 : : }
991 : :
992 : : sqlite_ps& bind(int parameter)
993 : : {
994 : : if (verbose > 4)
995 : : obatched(clog) << nickname << " bind " << parameter << "=" << "NULL" << endl;
996 : : int rc = sqlite3_bind_null (this->pp, parameter);
997 : : if (rc != SQLITE_OK)
998 : : throw sqlite_exception(rc, "sqlite3 bind");
999 : : return *this;
1000 : : }
1001 : :
1002 : :
1003 : 6651 : void step_ok_done() {
1004 [ + - + - : 19953 : tmp_ms_metric tick("sqlite3","step_done",nickname);
- + + - -
- ]
1005 [ + - ]: 6651 : int rc = sqlite3_step (this->pp);
1006 [ + + ]: 6651 : if (verbose > 4)
1007 [ + - + - : 192 : obatched(clog) << nickname << " step-ok-done(" << sqlite3_errstr(rc) << ") " << sql << endl;
+ - + - +
- + - + -
+ - ]
1008 [ + + - + ]: 6651 : if (rc != SQLITE_OK && rc != SQLITE_DONE && rc != SQLITE_ROW)
1009 [ # # # # ]: 0 : throw sqlite_exception(rc, "sqlite3 step");
1010 [ + - ]: 6651 : (void) sqlite3_reset (this->pp);
1011 : 6651 : }
1012 : :
1013 : :
1014 : 1682 : int step() {
1015 [ + - + - : 3364 : tmp_ms_metric tick("sqlite3","step",nickname);
- + + - -
- ]
1016 [ + - ]: 1682 : int rc = sqlite3_step (this->pp);
1017 [ + + ]: 1682 : if (verbose > 4)
1018 [ + - + - : 85 : obatched(clog) << nickname << " step(" << sqlite3_errstr(rc) << ") " << sql << endl;
+ - + - +
- + - + -
+ - ]
1019 : 1682 : return rc;
1020 : : }
1021 : :
1022 [ + + + + ]: 4858 : ~sqlite_ps () { sqlite3_finalize (this->pp); }
1023 [ + - + - : 1290 : operator sqlite3_stmt* () { return this->pp; }
+ - ]
1024 : : };
1025 : :
1026 : :
1027 : : ////////////////////////////////////////////////////////////////////////
1028 : :
1029 : : // RAII style templated autocloser
1030 : :
1031 : : template <class Payload, class Ignore>
1032 : : struct defer_dtor
1033 : : {
1034 : : public:
1035 : : typedef Ignore (*dtor_fn) (Payload);
1036 : :
1037 : : private:
1038 : : Payload p;
1039 : : dtor_fn fn;
1040 : :
1041 : : public:
1042 : 2431 : defer_dtor(Payload _p, dtor_fn _fn): p(_p), fn(_fn) {}
1043 : 1115 : ~defer_dtor() { (void) (*fn)(p); }
1044 : :
1045 : : private:
1046 : : defer_dtor(const defer_dtor<Payload,Ignore>&); // make uncopyable
1047 : : defer_dtor& operator=(const defer_dtor<Payload,Ignore> &); // make unassignable
1048 : : };
1049 : :
1050 : :
1051 : :
1052 : : ////////////////////////////////////////////////////////////////////////
1053 : :
1054 : :
1055 : : static string
1056 : 1592 : header_censor(const string& str)
1057 : : {
1058 : 1592 : string y;
1059 [ + + ]: 14376 : for (auto&& x : str)
1060 : : {
1061 [ + + + + : 12784 : if (isalnum(x) || x == '/' || x == '.' || x == ',' || x == '_' || x == ':')
+ + + + +
+ - + ]
1062 [ + - ]: 25565 : y += x;
1063 : : }
1064 : 1592 : return y;
1065 : : }
1066 : :
1067 : :
1068 : : static string
1069 : 796 : conninfo (struct MHD_Connection * conn)
1070 : : {
1071 : 796 : char hostname[256]; // RFC1035
1072 : 796 : char servname[256];
1073 : 796 : int sts = -1;
1074 : :
1075 [ - + ]: 796 : if (conn == 0)
1076 : 0 : return "internal";
1077 : :
1078 : : /* Look up client address data. */
1079 : 796 : const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn,
1080 : : MHD_CONNECTION_INFO_CLIENT_ADDRESS);
1081 [ + - ]: 796 : struct sockaddr *so = u ? u->client_addr : 0;
1082 : :
1083 [ + - - + ]: 796 : if (so && so->sa_family == AF_INET) {
1084 : 0 : sts = getnameinfo (so, sizeof (struct sockaddr_in),
1085 : : hostname, sizeof (hostname),
1086 : : servname, sizeof (servname),
1087 : : NI_NUMERICHOST | NI_NUMERICSERV);
1088 [ + - + - ]: 796 : } else if (so && so->sa_family == AF_INET6) {
1089 : 796 : struct sockaddr_in6* addr6 = (struct sockaddr_in6*) so;
1090 [ + - + - : 796 : if (IN6_IS_ADDR_V4MAPPED(&addr6->sin6_addr)) {
- + ]
1091 : 796 : struct sockaddr_in addr4;
1092 : 796 : memset (&addr4, 0, sizeof(addr4));
1093 : 796 : addr4.sin_family = AF_INET;
1094 : 796 : addr4.sin_port = addr6->sin6_port;
1095 : 796 : memcpy (&addr4.sin_addr.s_addr, addr6->sin6_addr.s6_addr+12, sizeof(addr4.sin_addr.s_addr));
1096 : 796 : sts = getnameinfo ((struct sockaddr*) &addr4, sizeof (addr4),
1097 : : hostname, sizeof (hostname),
1098 : : servname, sizeof (servname),
1099 : : NI_NUMERICHOST | NI_NUMERICSERV);
1100 : : } else {
1101 : 0 : sts = getnameinfo (so, sizeof (struct sockaddr_in6),
1102 : : hostname, sizeof (hostname),
1103 : : servname, sizeof (servname),
1104 : : NI_NUMERICHOST | NI_NUMERICSERV);
1105 : : }
1106 : : }
1107 : :
1108 [ - + ]: 796 : if (sts != 0) {
1109 : 0 : hostname[0] = servname[0] = '\0';
1110 : : }
1111 : :
1112 : : // extract headers relevant to administration
1113 [ - + ]: 796 : const char* user_agent = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: "";
1114 [ + + ]: 796 : const char* x_forwarded_for = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: "";
1115 : : // NB: these are untrustworthy, beware if machine-processing log files
1116 : :
1117 [ + - + - : 2388 : return string(hostname) + string(":") + string(servname) +
+ - + - +
- - + - +
- + - + -
+ - + - -
- - - - -
- - - -
- ]
1118 [ + - + - : 3335 : string(" UA:") + header_censor(string(user_agent)) +
+ - + - +
- - + + +
- + + + -
+ - - - -
- - - - -
- ]
1119 [ + - + - : 2394 : string(" XFF:") + header_censor(string(x_forwarded_for));
+ - + - +
+ + + - -
- - ]
1120 : : }
1121 : :
1122 : :
1123 : :
1124 : : ////////////////////////////////////////////////////////////////////////
1125 : :
1126 : : /* Wrapper for MHD_add_response_header that logs an error if we
1127 : : couldn't add the specified header. */
1128 : : static void
1129 : 2784 : add_mhd_response_header (struct MHD_Response *r,
1130 : : const char *h, const char *v)
1131 : : {
1132 [ - + ]: 2784 : if (MHD_add_response_header (r, h, v) == MHD_NO)
1133 [ # # # # : 0 : obatched(clog) << "Error: couldn't add '" << h << "' header" << endl;
# # # # ]
1134 : 2784 : }
1135 : :
1136 : : static void
1137 : 401 : add_mhd_last_modified (struct MHD_Response *resp, time_t mtime)
1138 : : {
1139 : 401 : struct tm now;
1140 : 401 : struct tm *nowp = gmtime_r (&mtime, &now);
1141 [ + - ]: 401 : if (nowp != NULL)
1142 : : {
1143 : 401 : char datebuf[80];
1144 : 401 : size_t rc = strftime (datebuf, sizeof (datebuf), "%a, %d %b %Y %T GMT",
1145 : : nowp);
1146 [ + - ]: 401 : if (rc > 0 && rc < sizeof (datebuf))
1147 : 401 : add_mhd_response_header (resp, "Last-Modified", datebuf);
1148 : : }
1149 : :
1150 : 401 : add_mhd_response_header (resp, "Cache-Control", "public");
1151 : 401 : }
1152 : :
1153 : : // quote all questionable characters of str for safe passage through a sh -c expansion.
1154 : : static string
1155 : 280 : shell_escape(const string& str)
1156 : : {
1157 : 280 : string y;
1158 [ + + ]: 39438 : for (auto&& x : str)
1159 : : {
1160 [ + + + + ]: 39158 : if (! isalnum(x) && x != '/')
1161 [ + - ]: 3311 : y += "\\";
1162 [ + - ]: 78316 : y += x;
1163 : : }
1164 : 280 : return y;
1165 : : }
1166 : :
1167 : :
1168 : : // PR25548: Perform POSIX / RFC3986 style path canonicalization on the input string.
1169 : : //
1170 : : // Namely:
1171 : : // // -> /
1172 : : // /foo/../ -> /
1173 : : // /./ -> /
1174 : : //
1175 : : // This mapping is done on dwarf-side source path names, which may
1176 : : // include these constructs, so we can deal with debuginfod clients
1177 : : // that accidentally canonicalize the paths.
1178 : : //
1179 : : // realpath(3) is close but not quite right, because it also resolves
1180 : : // symbolic links. Symlinks at the debuginfod server have nothing to
1181 : : // do with the build-time symlinks, thus they must not be considered.
1182 : : //
1183 : : // see also curl Curl_dedotdotify() aka RFC3986, which we mostly follow here
1184 : : // see also libc __realpath()
1185 : : // see also llvm llvm::sys::path::remove_dots()
1186 : : static string
1187 : 1719 : canon_pathname (const string& input)
1188 : : {
1189 : 1719 : string i = input; // 5.2.4 (1)
1190 : 1719 : string o;
1191 : :
1192 : 16036 : while (i.size() != 0)
1193 : : {
1194 : : // 5.2.4 (2) A
1195 [ + - - + : 28634 : if (i.substr(0,3) == "../")
- + ]
1196 [ # # # # ]: 0 : i = i.substr(3);
1197 [ + - - + : 28634 : else if(i.substr(0,2) == "./")
- + ]
1198 [ # # # # ]: 0 : i = i.substr(2);
1199 : :
1200 : : // 5.2.4 (2) B
1201 [ + - - + : 28634 : else if (i.substr(0,3) == "/./")
+ + ]
1202 [ + - + + ]: 311 : i = i.substr(2);
1203 [ - + ]: 14126 : else if (i == "/.")
1204 [ # # ]: 0 : i = ""; // no need to handle "/." complete-path-segment case; we're dealing with file names
1205 : :
1206 : : // 5.2.4 (2) C
1207 [ + - - + : 28252 : else if (i.substr(0,4) == "/../") {
+ + ]
1208 [ + - + + ]: 236 : i = i.substr(3);
1209 : 236 : string::size_type sl = o.rfind("/");
1210 [ + - ]: 236 : if (sl != string::npos)
1211 [ + - + - ]: 472 : o = o.substr(0, sl);
1212 : : else
1213 [ # # ]: 0 : o = "";
1214 [ - + ]: 13890 : } else if (i == "/..")
1215 [ # # ]: 0 : i = ""; // no need to handle "/.." complete-path-segment case; we're dealing with file names
1216 : :
1217 : : // 5.2.4 (2) D
1218 : : // no need to handle these cases; we're dealing with file names
1219 [ - + ]: 13890 : else if (i == ".")
1220 [ # # ]: 0 : i = "";
1221 [ - + ]: 13890 : else if (i == "..")
1222 [ # # ]: 0 : i = "";
1223 : :
1224 : : // POSIX special: map // to /
1225 [ + - - + : 27780 : else if (i.substr(0,2) == "//")
+ + ]
1226 [ + - + + ]: 76 : i = i.substr(1);
1227 : :
1228 : : // 5.2.4 (2) E
1229 : : else {
1230 [ - + ]: 13822 : string::size_type next_slash = i.find("/", (i[0]=='/' ? 1 : 0)); // skip first slash
1231 [ + - + + : 27644 : o += i.substr(0, next_slash);
- - ]
1232 [ + + ]: 13822 : if (next_slash == string::npos)
1233 [ + + + - ]: 17755 : i = "";
1234 : : else
1235 [ + - + + ]: 22781 : i = i.substr(next_slash);
1236 : : }
1237 : : }
1238 : :
1239 [ + - ]: 3438 : return o;
1240 : : }
1241 : :
1242 : :
1243 : : // Estimate available free space for a given filesystem via statfs(2).
1244 : : // Return true if the free fraction is known to be smaller than the
1245 : : // given minimum percentage. Also update a related metric.
1246 : 1954 : bool statfs_free_enough_p(const string& path, const string& label, long minfree = 0)
1247 : : {
1248 : 1954 : struct statfs sfs;
1249 : 1954 : int rc = statfs(path.c_str(), &sfs);
1250 [ + + ]: 1954 : if (rc == 0)
1251 : : {
1252 : 1929 : double s = (double) sfs.f_bavail / (double) sfs.f_blocks;
1253 [ + - + - : 3858 : set_metric("filesys_free_ratio","purpose",label, s);
- + - - ]
1254 : 1929 : return ((s * 100.0) < minfree);
1255 : : }
1256 : : return false;
1257 : : }
1258 : :
1259 : :
1260 : :
1261 : : // A map-like class that owns a cache of file descriptors (indexed by
1262 : : // file / content names).
1263 : : //
1264 : : // If only it could use fd's instead of file names ... but we can't
1265 : : // dup(2) to create independent descriptors for the same unlinked
1266 : : // files, so would have to use some goofy linux /proc/self/fd/%d
1267 : : // hack such as the following
1268 : :
1269 : : #if 0
1270 : : int superdup(int fd)
1271 : : {
1272 : : #ifdef __linux__
1273 : : char *fdpath = NULL;
1274 : : int rc = asprintf(& fdpath, "/proc/self/fd/%d", fd);
1275 : : int newfd;
1276 : : if (rc >= 0)
1277 : : newfd = open(fdpath, O_RDONLY);
1278 : : else
1279 : : newfd = -1;
1280 : : free (fdpath);
1281 : : return newfd;
1282 : : #else
1283 : : return -1;
1284 : : #endif
1285 : : }
1286 : : #endif
1287 : :
1288 : : class libarchive_fdcache
1289 : : {
1290 : : private:
1291 : : mutex fdcache_lock;
1292 : :
1293 : : struct fdcache_entry
1294 : : {
1295 : : string archive;
1296 : : string entry;
1297 : : string fd;
1298 : : double fd_size_mb; // slightly rounded up megabytes
1299 : : };
1300 : : deque<fdcache_entry> lru; // @head: most recently used
1301 : : long max_fds;
1302 : : deque<fdcache_entry> prefetch; // prefetched
1303 : : long max_mbs;
1304 : : long max_prefetch_mbs;
1305 : : long max_prefetch_fds;
1306 : :
1307 : : public:
1308 : 1083 : void set_metrics()
1309 : : {
1310 : 1083 : double fdcache_mb = 0.0;
1311 : 1083 : double prefetch_mb = 0.0;
1312 [ + + + + ]: 5706 : for (auto i = lru.begin(); i < lru.end(); i++)
1313 : 1770 : fdcache_mb += i->fd_size_mb;
1314 [ + + + + ]: 3312 : for (auto j = prefetch.begin(); j < prefetch.end(); j++)
1315 : 573 : prefetch_mb += j->fd_size_mb;
1316 [ + - ]: 2166 : set_metric("fdcache_bytes", fdcache_mb*1024.0*1024.0);
1317 [ + - ]: 2166 : set_metric("fdcache_count", lru.size());
1318 [ + - ]: 2166 : set_metric("fdcache_prefetch_bytes", prefetch_mb*1024.0*1024.0);
1319 [ + - ]: 2166 : set_metric("fdcache_prefetch_count", prefetch.size());
1320 : 1083 : }
1321 : :
1322 : 609 : void intern(const string& a, const string& b, string fd, off_t sz, bool front_p)
1323 : : {
1324 : 609 : {
1325 : 609 : unique_lock<mutex> lock(fdcache_lock);
1326 : : // nuke preexisting copy
1327 [ + + + + ]: 2982 : for (auto i = lru.begin(); i < lru.end(); i++)
1328 : : {
1329 [ + + - + ]: 588 : if (i->archive == a && i->entry == b)
1330 : : {
1331 : 0 : unlink (i->fd.c_str());
1332 : 0 : lru.erase(i);
1333 [ # # # # : 0 : inc_metric("fdcache_op_count","op","dequeue");
# # # # #
# # # # #
# # ]
1334 : 0 : break; // must not continue iterating
1335 : : }
1336 : : }
1337 : : // nuke preexisting copy in prefetch
1338 [ + + + + ]: 2043 : for (auto i = prefetch.begin(); i < prefetch.end(); i++)
1339 : : {
1340 [ + + + - ]: 275 : if (i->archive == a && i->entry == b)
1341 : : {
1342 : 0 : unlink (i->fd.c_str());
1343 : 0 : prefetch.erase(i);
1344 [ # # # # : 0 : inc_metric("fdcache_op_count","op","prefetch_dequeue");
# # # # #
# # # # #
# # ]
1345 : 0 : break; // must not continue iterating
1346 : : }
1347 : : }
1348 : 609 : double mb = (sz+65535)/1048576.0; // round up to 64K block
1349 [ + - + - : 1218 : fdcache_entry n = { a, b, fd, mb };
+ - + - ]
1350 [ + + ]: 609 : if (front_p)
1351 : : {
1352 [ + - + - : 666 : inc_metric("fdcache_op_count","op","enqueue");
+ - + - -
+ - + + -
- - - - ]
1353 [ + - ]: 333 : lru.push_front(n);
1354 : : }
1355 : : else
1356 : : {
1357 [ + - + - : 828 : inc_metric("fdcache_op_count","op","prefetch_enqueue");
+ - + - -
+ + - + -
- - - - ]
1358 [ + - ]: 276 : prefetch.push_front(n);
1359 : : }
1360 [ + + ]: 609 : if (verbose > 3)
1361 [ + - + - ]: 1782 : obatched(clog) << "fdcache interned a=" << a << " b=" << b
1362 [ + - + - : 594 : << " fd=" << fd << " mb=" << mb << " front=" << front_p << endl;
+ - + - +
- + - + -
+ - + - ]
1363 : :
1364 [ + - ]: 609 : set_metrics();
1365 : : }
1366 : :
1367 : : // NB: we age the cache at lookup time too
1368 [ + - - + : 609 : if (statfs_free_enough_p(tmpdir, "tmpdir", fdcache_mintmp))
- + ]
1369 : : {
1370 [ # # # # : 0 : inc_metric("fdcache_op_count","op","emerg-flush");
# # # # #
# # # #
# ]
1371 [ # # # # ]: 0 : obatched(clog) << "fdcache emergency flush for filling tmpdir" << endl;
1372 : 0 : this->limit(0, 0, 0, 0); // emergency flush
1373 : : }
1374 [ + + ]: 609 : else if (front_p)
1375 : 333 : this->limit(max_fds, max_mbs, max_prefetch_fds, max_prefetch_mbs); // age cache if required
1376 : 609 : }
1377 : :
1378 : 370 : int lookup(const string& a, const string& b)
1379 : : {
1380 : 370 : int fd = -1;
1381 : 370 : {
1382 : 370 : unique_lock<mutex> lock(fdcache_lock);
1383 [ + + + + ]: 2636 : for (auto i = lru.begin(); i < lru.end(); i++)
1384 : : {
1385 [ + + + + ]: 660 : if (i->archive == a && i->entry == b)
1386 : : { // found it; move it to head of lru
1387 [ + - ]: 28 : fdcache_entry n = *i;
1388 : 28 : lru.erase(i); // invalidates i, so no more iteration!
1389 [ + - ]: 28 : lru.push_front(n);
1390 [ + - + - : 56 : inc_metric("fdcache_op_count","op","requeue_front");
+ - + - -
+ - + + -
- - - - ]
1391 [ + - ]: 28 : fd = open(n.fd.c_str(), O_RDONLY);
1392 : 28 : break;
1393 : : }
1394 : : }
1395 : : // Iterate through prefetch while fd == -1 to ensure that no duplication between lru and
1396 : : // prefetch occurs.
1397 [ + + + + : 1246 : for ( auto i = prefetch.begin(); fd == -1 && i < prefetch.end(); ++i)
+ + ]
1398 : : {
1399 [ + + + + ]: 274 : if (i->archive == a && i->entry == b)
1400 : : { // found it; take the entry from the prefetch deque to the lru deque, since it has now been accessed.
1401 [ + - ]: 7 : fdcache_entry n = *i;
1402 : 7 : prefetch.erase(i);
1403 [ + - ]: 7 : lru.push_front(n);
1404 [ + - + - : 14 : inc_metric("fdcache_op_count","op","prefetch_access");
+ - + - -
+ - + + -
- - - - ]
1405 [ + - ]: 7 : fd = open(n.fd.c_str(), O_RDONLY);
1406 : 7 : break;
1407 : : }
1408 : : }
1409 : : }
1410 : :
1411 [ + - - + : 370 : if (statfs_free_enough_p(tmpdir, "tmpdir", fdcache_mintmp))
- + ]
1412 : : {
1413 [ # # # # : 0 : inc_metric("fdcache_op_count","op","emerg-flush");
# # # # #
# # # #
# ]
1414 [ # # # # ]: 0 : obatched(clog) << "fdcache emergency flush for filling tmpdir" << endl;
1415 : 0 : this->limit(0, 0, 0, 0); // emergency flush
1416 : : }
1417 [ + + ]: 370 : else if (fd >= 0)
1418 : 35 : this->limit(max_fds, max_mbs, max_prefetch_fds, max_prefetch_mbs); // age cache if required
1419 : :
1420 : 370 : return fd;
1421 : : }
1422 : :
1423 : 618 : int probe(const string& a, const string& b) // just a cache residency check - don't modify LRU state, don't open
1424 : : {
1425 : 1236 : unique_lock<mutex> lock(fdcache_lock);
1426 [ + + + + ]: 3093 : for (auto i = lru.begin(); i < lru.end(); i++)
1427 : : {
1428 [ + + + + ]: 632 : if (i->archive == a && i->entry == b)
1429 : : {
1430 [ + - + - : 26 : inc_metric("fdcache_op_count","op","probe_hit");
+ - + - -
+ - + - -
- - - - ]
1431 : 13 : return true;
1432 : : }
1433 : : }
1434 [ + + + + ]: 2029 : for (auto i = prefetch.begin(); i < prefetch.end(); i++)
1435 : : {
1436 [ + + + - ]: 273 : if (i->archive == a && i->entry == b)
1437 : : {
1438 [ # # # # : 0 : inc_metric("fdcache_op_count","op","prefetch_probe_hit");
# # # # #
# # # # #
# # ]
1439 : 0 : return true;
1440 : : }
1441 : : }
1442 [ + - + - : 1210 : inc_metric("fdcache_op_count","op","probe_miss");
+ - + - -
+ - + - -
- - - - ]
1443 : 605 : return false;
1444 : : }
1445 : :
1446 : 0 : void clear(const string& a, const string& b)
1447 : : {
1448 : 0 : unique_lock<mutex> lock(fdcache_lock);
1449 [ # # # # ]: 0 : for (auto i = lru.begin(); i < lru.end(); i++)
1450 : : {
1451 [ # # # # ]: 0 : if (i->archive == a && i->entry == b)
1452 : : { // found it; erase it from lru
1453 [ # # ]: 0 : fdcache_entry n = *i;
1454 : 0 : lru.erase(i); // invalidates i, so no more iteration!
1455 [ # # # # : 0 : inc_metric("fdcache_op_count","op","clear");
# # # # #
# # # # #
# # ]
1456 : 0 : unlink (n.fd.c_str());
1457 [ # # ]: 0 : set_metrics();
1458 : 0 : return;
1459 : : }
1460 : : }
1461 [ # # # # ]: 0 : for (auto i = prefetch.begin(); i < prefetch.end(); i++)
1462 : : {
1463 [ # # # # ]: 0 : if (i->archive == a && i->entry == b)
1464 : : { // found it; erase it from lru
1465 [ # # ]: 0 : fdcache_entry n = *i;
1466 : 0 : prefetch.erase(i); // invalidates i, so no more iteration!
1467 [ # # # # : 0 : inc_metric("fdcache_op_count","op","prefetch_clear");
# # # # #
# # # # #
# # ]
1468 : 0 : unlink (n.fd.c_str());
1469 [ # # ]: 0 : set_metrics();
1470 : 0 : return;
1471 : : }
1472 : : }
1473 : : }
1474 : :
1475 : 510 : void limit(long maxfds, long maxmbs, long maxprefetchfds, long maxprefetchmbs , bool metrics_p = true)
1476 : : {
1477 [ + + + + : 510 : if (verbose > 3 && (this->max_fds != maxfds || this->max_mbs != maxmbs))
+ + ]
1478 [ + - + - : 188 : obatched(clog) << "fdcache limited to maxfds=" << maxfds << " maxmbs=" << maxmbs << endl;
+ - + - ]
1479 : :
1480 : 510 : unique_lock<mutex> lock(fdcache_lock);
1481 : 510 : this->max_fds = maxfds;
1482 : 510 : this->max_mbs = maxmbs;
1483 : 510 : this->max_prefetch_fds = maxprefetchfds;
1484 : 510 : this->max_prefetch_mbs = maxprefetchmbs;
1485 : 510 : long total_fd = 0;
1486 : 510 : double total_mb = 0.0;
1487 [ + + + + ]: 3567 : for (auto i = lru.begin(); i < lru.end(); i++)
1488 : : {
1489 : : // accumulate totals from most recently used one going backward
1490 : 1120 : total_fd ++;
1491 [ + + ]: 1120 : total_mb += i->fd_size_mb;
1492 [ + + + + ]: 1120 : if (total_fd > this->max_fds || total_mb > this->max_mbs)
1493 : : {
1494 : : // found the cut here point!
1495 : :
1496 [ + + + + ]: 1562 : for (auto j = i; j < lru.end(); j++) // close all the fds from here on in
1497 : : {
1498 [ + + ]: 340 : if (verbose > 3)
1499 [ + - + - : 1336 : obatched(clog) << "fdcache evicted a=" << j->archive << " b=" << j->entry
+ - ]
1500 [ + - + - : 334 : << " fd=" << j->fd << " mb=" << j->fd_size_mb << endl;
+ - + - +
- + - +
- ]
1501 [ + + ]: 340 : if (metrics_p)
1502 [ + - + - : 560 : inc_metric("fdcache_op_count","op","evict");
+ - + - -
+ - + - -
- - - - ]
1503 : 340 : unlink (j->fd.c_str());
1504 : : }
1505 : :
1506 : 271 : lru.erase(i, lru.end()); // erase the nodes generally
1507 : 271 : break;
1508 : : }
1509 : : }
1510 : 510 : total_fd = 0;
1511 : 510 : total_mb = 0.0;
1512 [ + + + + ]: 1086 : for(auto i = prefetch.begin(); i < prefetch.end(); i++){
1513 : : // accumulate totals from most recently used one going backward
1514 : 284 : total_fd ++;
1515 [ + + ]: 284 : total_mb += i->fd_size_mb;
1516 [ + + + + ]: 284 : if (total_fd > this->max_prefetch_fds || total_mb > this->max_prefetch_mbs)
1517 : : {
1518 : : // found the cut here point!
1519 [ + + + + ]: 1331 : for (auto j = i; j < prefetch.end(); j++) // close all the fds from here on in
1520 : : {
1521 [ + + ]: 269 : if (verbose > 3)
1522 [ + - + - : 1040 : obatched(clog) << "fdcache evicted from prefetch a=" << j->archive << " b=" << j->entry
+ - ]
1523 [ + - + - : 260 : << " fd=" << j->fd << " mb=" << j->fd_size_mb << endl;
+ - + - +
- + - +
- ]
1524 [ + + ]: 269 : if (metrics_p)
1525 [ + - + - : 506 : inc_metric("fdcache_op_count","op","prefetch_evict");
+ - + - -
+ - + - -
- - - - ]
1526 : 269 : unlink (j->fd.c_str());
1527 : : }
1528 : :
1529 : 262 : prefetch.erase(i, prefetch.end()); // erase the nodes generally
1530 : 262 : break;
1531 : : }
1532 : : }
1533 [ + + + - ]: 510 : if (metrics_p) set_metrics();
1534 : 510 : }
1535 : :
1536 : :
1537 : 36 : ~libarchive_fdcache()
1538 : 36 : {
1539 : : // unlink any fdcache entries in $TMPDIR
1540 : : // don't update metrics; those globals may be already destroyed
1541 : 36 : limit(0, 0, 0, 0, false);
1542 : 36 : }
1543 : : };
1544 : : static libarchive_fdcache fdcache;
1545 : :
1546 : : /* Search ELF_FD for an ELF/DWARF section with name SECTION.
1547 : : If found copy the section to a temporary file and return
1548 : : its file descriptor, otherwise return -1.
1549 : :
1550 : : The temporary file's mtime will be set to PARENT_MTIME.
1551 : : B_SOURCE should be a description of the parent file suitable
1552 : : for printing to the log. */
1553 : :
1554 : : static int
1555 : 6 : extract_section (int elf_fd, int64_t parent_mtime,
1556 : : const string& b_source, const string& section)
1557 : : {
1558 : : /* Search the fdcache. */
1559 : 6 : struct stat fs;
1560 : 6 : int fd = fdcache.lookup (b_source, section);
1561 [ - + ]: 6 : if (fd >= 0)
1562 : : {
1563 [ # # ]: 0 : if (fstat (fd, &fs) != 0)
1564 : : {
1565 [ # # ]: 0 : if (verbose)
1566 [ # # ]: 0 : obatched (clog) << "cannot fstate fdcache "
1567 [ # # # # : 0 : << b_source << " " << section << endl;
# # ]
1568 : 0 : close (fd);
1569 : 0 : return -1;
1570 : : }
1571 [ # # ]: 0 : if ((int64_t) fs.st_mtime != parent_mtime)
1572 : : {
1573 [ # # ]: 0 : if (verbose)
1574 [ # # ]: 0 : obatched(clog) << "mtime mismatch for "
1575 [ # # # # : 0 : << b_source << " " << section << endl;
# # ]
1576 : 0 : close (fd);
1577 : 0 : return -1;
1578 : : }
1579 : : /* Success. */
1580 : : return fd;
1581 : : }
1582 : :
1583 : 6 : Elf *elf = elf_begin (elf_fd, ELF_C_READ_MMAP_PRIVATE, NULL);
1584 [ + - ]: 6 : if (elf == NULL)
1585 : : return -1;
1586 : :
1587 : : /* Try to find the section and copy the contents into a separate file. */
1588 : 6 : try
1589 : : {
1590 : 6 : size_t shstrndx;
1591 [ + - ]: 6 : int rc = elf_getshdrstrndx (elf, &shstrndx);
1592 [ - + ]: 6 : if (rc < 0)
1593 [ # # # # ]: 0 : throw elfutils_exception (rc, "getshdrstrndx");
1594 : :
1595 : : Elf_Scn *scn = NULL;
1596 : 210 : while (true)
1597 : : {
1598 [ + - ]: 108 : scn = elf_nextscn (elf, scn);
1599 [ + - ]: 108 : if (scn == NULL)
1600 : : break;
1601 : 108 : GElf_Shdr shdr_storage;
1602 [ + - ]: 108 : GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_storage);
1603 [ + - ]: 108 : if (shdr == NULL)
1604 : : break;
1605 : :
1606 [ + - ]: 108 : const char *scn_name = elf_strptr (elf, shstrndx, shdr->sh_name);
1607 [ + - ]: 108 : if (scn_name == NULL)
1608 : : break;
1609 [ + + ]: 108 : if (scn_name == section)
1610 : : {
1611 : 6 : Elf_Data *data = NULL;
1612 : :
1613 : : /* We found the desired section. */
1614 [ + - ]: 6 : data = elf_rawdata (scn, NULL);
1615 [ - + ]: 6 : if (data == NULL)
1616 [ # # # # : 0 : throw elfutils_exception (elf_errno (), "elfraw_data");
# # ]
1617 [ + + ]: 6 : if (data->d_buf == NULL)
1618 : : {
1619 [ + - + - ]: 4 : obatched(clog) << "section " << section
1620 [ + - + - ]: 2 : << " is empty" << endl;
1621 : 2 : break;
1622 : : }
1623 : :
1624 : : /* Create temporary file containing the section. */
1625 : 4 : char *tmppath = NULL;
1626 : 4 : rc = asprintf (&tmppath, "%s/debuginfod.XXXXXX", tmpdir.c_str());
1627 [ - + ]: 4 : if (rc < 0)
1628 [ # # # # ]: 0 : throw libc_exception (ENOMEM, "cannot allocate tmppath");
1629 : 6 : defer_dtor<void*,void> tmmpath_freer (tmppath, free);
1630 [ + - ]: 4 : fd = mkstemp (tmppath);
1631 [ - + ]: 4 : if (fd < 0)
1632 [ # # # # ]: 0 : throw libc_exception (errno, "cannot create temporary file");
1633 [ + - ]: 4 : ssize_t res = write_retry (fd, data->d_buf, data->d_size);
1634 [ + - - + ]: 4 : if (res < 0 || (size_t) res != data->d_size)
1635 [ # # # # ]: 0 : throw libc_exception (errno, "cannot write to temporary file");
1636 : :
1637 : : /* Set mtime to be the same as the parent file's mtime. */
1638 : 4 : struct timespec tvs[2];
1639 [ - + ]: 4 : if (fstat (elf_fd, &fs) != 0)
1640 [ # # # # ]: 0 : throw libc_exception (errno, "cannot fstat file");
1641 : :
1642 : 4 : tvs[0].tv_sec = 0;
1643 : 4 : tvs[0].tv_nsec = UTIME_OMIT;
1644 : 4 : tvs[1] = fs.st_mtim;
1645 : 4 : (void) futimens (fd, tvs);
1646 : :
1647 : : /* Add to fdcache. */
1648 [ + - + - ]: 8 : fdcache.intern (b_source, section, tmppath, data->d_size, true);
1649 : 4 : break;
1650 : : }
1651 : 102 : }
1652 : : }
1653 [ - - ]: 0 : catch (const reportable_exception &e)
1654 : : {
1655 [ - - ]: 0 : e.report (clog);
1656 [ - - ]: 0 : close (fd);
1657 : 0 : fd = -1;
1658 : : }
1659 : :
1660 : 6 : elf_end (elf);
1661 : : return fd;
1662 : : }
1663 : :
1664 : : static struct MHD_Response*
1665 : 37 : handle_buildid_f_match (bool internal_req_t,
1666 : : int64_t b_mtime,
1667 : : const string& b_source0,
1668 : : const string& section,
1669 : : int *result_fd)
1670 : : {
1671 : 37 : (void) internal_req_t; // ignored
1672 : 37 : int fd = open(b_source0.c_str(), O_RDONLY);
1673 [ - + ]: 37 : if (fd < 0)
1674 [ # # # # : 0 : throw libc_exception (errno, string("open ") + b_source0);
# # # # #
# ]
1675 : :
1676 : : // NB: use manual close(2) in error case instead of defer_dtor, because
1677 : : // in the normal case, we want to hand the fd over to libmicrohttpd for
1678 : : // file transfer.
1679 : :
1680 : 37 : struct stat s;
1681 : 37 : int rc = fstat(fd, &s);
1682 [ - + ]: 37 : if (rc < 0)
1683 : : {
1684 : 0 : close(fd);
1685 [ # # # # : 0 : throw libc_exception (errno, string("fstat ") + b_source0);
# # # # #
# ]
1686 : : }
1687 : :
1688 [ - + ]: 37 : if ((int64_t) s.st_mtime != b_mtime)
1689 : : {
1690 [ # # ]: 0 : if (verbose)
1691 [ # # # # ]: 0 : obatched(clog) << "mtime mismatch for " << b_source0 << endl;
1692 : 0 : close(fd);
1693 : 0 : return 0;
1694 : : }
1695 : :
1696 [ + + ]: 37 : if (!section.empty ())
1697 : : {
1698 : 3 : int scn_fd = extract_section (fd, s.st_mtime, b_source0, section);
1699 : 3 : close (fd);
1700 : :
1701 [ + + ]: 3 : if (scn_fd >= 0)
1702 : 2 : fd = scn_fd;
1703 : : else
1704 : : {
1705 [ + - ]: 1 : if (verbose)
1706 [ + - ]: 3 : obatched (clog) << "cannot find section " << section
1707 [ + - + - : 1 : << " for " << b_source0 << endl;
+ - ]
1708 : 1 : return 0;
1709 : : }
1710 : :
1711 : 2 : rc = fstat(fd, &s);
1712 [ - + ]: 2 : if (rc < 0)
1713 : : {
1714 : 0 : close (fd);
1715 [ # # # # : 0 : throw libc_exception (errno, string ("fstat ") + b_source0
# # # # #
# # # #
# ]
1716 [ # # # # : 0 : + string (" ") + section);
# # # # #
# # # #
# ]
1717 : : }
1718 : : }
1719 : :
1720 : 36 : struct MHD_Response* r = MHD_create_response_from_fd ((uint64_t) s.st_size, fd);
1721 [ + - + - : 72 : inc_metric ("http_responses_total","result","file");
+ - - + -
+ - + - -
- - ]
1722 [ - + ]: 36 : if (r == 0)
1723 : : {
1724 [ # # ]: 0 : if (verbose)
1725 [ # # ]: 0 : obatched(clog) << "cannot create fd-response for " << b_source0
1726 [ # # # # : 0 : << " section=" << section << endl;
# # ]
1727 : 0 : close(fd);
1728 : : }
1729 : : else
1730 : : {
1731 : 72 : std::string file = b_source0.substr(b_source0.find_last_of("/")+1, b_source0.length());
1732 [ + - ]: 36 : add_mhd_response_header (r, "Content-Type", "application/octet-stream");
1733 [ + - ]: 36 : add_mhd_response_header (r, "X-DEBUGINFOD-SIZE",
1734 [ + - ]: 36 : to_string(s.st_size).c_str());
1735 [ + - ]: 36 : add_mhd_response_header (r, "X-DEBUGINFOD-FILE", file.c_str());
1736 [ + - ]: 36 : add_mhd_last_modified (r, s.st_mtime);
1737 [ + - ]: 36 : if (verbose > 1)
1738 [ + - + - : 72 : obatched(clog) << "serving file " << b_source0 << " section=" << section << endl;
+ - + - +
- - - ]
1739 : : /* libmicrohttpd will close it. */
1740 [ + - ]: 36 : if (result_fd)
1741 : 36 : *result_fd = fd;
1742 : : }
1743 : :
1744 : : return r;
1745 : : }
1746 : :
1747 : : // For security/portability reasons, many distro-package archives have
1748 : : // a "./" in front of path names; others have nothing, others have
1749 : : // "/". Canonicalize them all to a single leading "/", with the
1750 : : // assumption that this matches the dwarf-derived file names too.
1751 : 1751 : string canonicalized_archive_entry_pathname(struct archive_entry *e)
1752 : : {
1753 : 3502 : string fn = archive_entry_pathname(e);
1754 [ - + ]: 1751 : if (fn.size() == 0)
1755 [ # # ]: 0 : return fn;
1756 [ - + ]: 1751 : if (fn[0] == '/')
1757 [ - - + + ]: 1751 : return fn;
1758 [ + + ]: 1751 : if (fn[0] == '.')
1759 [ + - ]: 1148 : return fn.substr(1);
1760 : : else
1761 [ + - + - : 1206 : return string("/")+fn;
- - ]
1762 : : }
1763 : :
1764 : :
1765 : :
1766 : : static struct MHD_Response*
1767 : 393 : handle_buildid_r_match (bool internal_req_p,
1768 : : int64_t b_mtime,
1769 : : const string& b_source0,
1770 : : const string& b_source1,
1771 : : const string& section,
1772 : : int *result_fd)
1773 : : {
1774 : 393 : struct stat fs;
1775 : 393 : int rc = stat (b_source0.c_str(), &fs);
1776 [ + + ]: 393 : if (rc != 0)
1777 [ + - + - : 58 : throw libc_exception (errno, string("stat ") + b_source0);
+ - - + -
- ]
1778 : :
1779 [ - + ]: 364 : if ((int64_t) fs.st_mtime != b_mtime)
1780 : : {
1781 [ # # ]: 0 : if (verbose)
1782 [ # # # # ]: 0 : obatched(clog) << "mtime mismatch for " << b_source0 << endl;
1783 : 0 : return 0;
1784 : : }
1785 : :
1786 : : // check for a match in the fdcache first
1787 : 364 : int fd = fdcache.lookup(b_source0, b_source1);
1788 [ + + ]: 364 : while (fd >= 0) // got one!; NB: this is really an if() with a possible branch out to the end
1789 : : {
1790 : 35 : rc = fstat(fd, &fs);
1791 [ - + ]: 35 : if (rc < 0) // disappeared?
1792 : : {
1793 [ # # ]: 0 : if (verbose)
1794 [ # # # # ]: 0 : obatched(clog) << "cannot fstat fdcache " << b_source0 << endl;
1795 : 0 : close(fd);
1796 : 0 : fdcache.clear(b_source0, b_source1);
1797 : : break; // branch out of if "loop", to try new libarchive fetch attempt
1798 : : }
1799 : :
1800 [ + + ]: 35 : if (!section.empty ())
1801 : : {
1802 [ + - + - ]: 1 : int scn_fd = extract_section (fd, fs.st_mtime,
1803 [ + - - + : 2 : b_source0 + ":" + b_source1,
- - ]
1804 : : section);
1805 : 1 : close (fd);
1806 [ - + ]: 1 : if (scn_fd >= 0)
1807 : 0 : fd = scn_fd;
1808 : : else
1809 : : {
1810 [ + - ]: 1 : if (verbose)
1811 [ + - ]: 3 : obatched (clog) << "cannot find section " << section
1812 : : << " for archive " << b_source0
1813 [ + - + - : 1 : << " file " << b_source1 << endl;
+ - + - +
- ]
1814 : 1 : return 0;
1815 : : }
1816 : :
1817 : 0 : rc = fstat(fd, &fs);
1818 [ # # ]: 0 : if (rc < 0)
1819 : : {
1820 : 0 : close (fd);
1821 [ # # ]: 0 : throw libc_exception (errno,
1822 [ # # # # : 0 : string ("fstat archive ") + b_source0 + string (" file ") + b_source1
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# ]
1823 [ # # # # : 0 : + string (" section ") + section);
# # # # #
# # # #
# ]
1824 : : }
1825 : : }
1826 : :
1827 : 34 : struct MHD_Response* r = MHD_create_response_from_fd (fs.st_size, fd);
1828 [ - + ]: 34 : if (r == 0)
1829 : : {
1830 [ # # ]: 0 : if (verbose)
1831 [ # # # # ]: 0 : obatched(clog) << "cannot create fd-response for " << b_source0 << endl;
1832 : 0 : close(fd);
1833 : : break; // branch out of if "loop", to try new libarchive fetch attempt
1834 : : }
1835 : :
1836 [ + - + - : 68 : inc_metric ("http_responses_total","result","archive fdcache");
+ - - + -
+ - - -
- ]
1837 : :
1838 : 34 : add_mhd_response_header (r, "Content-Type", "application/octet-stream");
1839 [ + - ]: 34 : add_mhd_response_header (r, "X-DEBUGINFOD-SIZE",
1840 : 34 : to_string(fs.st_size).c_str());
1841 : 34 : add_mhd_response_header (r, "X-DEBUGINFOD-ARCHIVE", b_source0.c_str());
1842 : 34 : add_mhd_response_header (r, "X-DEBUGINFOD-FILE", b_source1.c_str());
1843 : 34 : add_mhd_last_modified (r, fs.st_mtime);
1844 [ + - ]: 34 : if (verbose > 1)
1845 [ + - ]: 102 : obatched(clog) << "serving fdcache archive " << b_source0
1846 : : << " file " << b_source1
1847 [ + - + - : 34 : << " section=" << section << endl;
+ - + - +
- ]
1848 : : /* libmicrohttpd will close it. */
1849 [ + - ]: 34 : if (result_fd)
1850 : 34 : *result_fd = fd;
1851 : : return r;
1852 : : // NB: see, we never go around the 'loop' more than once
1853 : : }
1854 : :
1855 : : // no match ... grumble, must process the archive
1856 : 693 : string archive_decoder = "/dev/null";
1857 [ + - + + ]: 658 : string archive_extension = "";
1858 [ + + ]: 929 : for (auto&& arch : scan_archives)
1859 [ + + ]: 600 : if (string_endswith(b_source0, arch.first))
1860 : : {
1861 [ + - ]: 329 : archive_extension = arch.first;
1862 [ + - ]: 929 : archive_decoder = arch.second;
1863 : : }
1864 : 329 : FILE* fp;
1865 : 329 : defer_dtor<FILE*,int>::dtor_fn dfn;
1866 [ + + ]: 329 : if (archive_decoder != "cat")
1867 : : {
1868 [ + - + - : 792 : string popen_cmd = archive_decoder + " " + shell_escape(b_source0);
+ - - + -
- - - ]
1869 [ + - ]: 264 : fp = popen (popen_cmd.c_str(), "r"); // "e" O_CLOEXEC?
1870 : 264 : dfn = pclose;
1871 [ - + ]: 264 : if (fp == NULL)
1872 [ # # # # : 0 : throw libc_exception (errno, string("popen ") + popen_cmd);
# # # # #
# ]
1873 : : }
1874 : : else
1875 : : {
1876 [ + - ]: 65 : fp = fopen (b_source0.c_str(), "r");
1877 : 65 : dfn = fclose;
1878 [ - + ]: 65 : if (fp == NULL)
1879 [ # # # # : 0 : throw libc_exception (errno, string("fopen ") + b_source0);
# # # # #
# ]
1880 : : }
1881 [ - + ]: 329 : defer_dtor<FILE*,int> fp_closer (fp, dfn);
1882 : :
1883 : 329 : struct archive *a;
1884 [ + - ]: 329 : a = archive_read_new();
1885 [ - + ]: 329 : if (a == NULL)
1886 [ # # # # ]: 0 : throw archive_exception("cannot create archive reader");
1887 : 329 : defer_dtor<struct archive*,int> archive_closer (a, archive_read_free);
1888 : :
1889 [ + - ]: 329 : rc = archive_read_support_format_all(a);
1890 [ - + ]: 329 : if (rc != ARCHIVE_OK)
1891 [ # # # # ]: 0 : throw archive_exception(a, "cannot select all format");
1892 [ + - ]: 329 : rc = archive_read_support_filter_all(a);
1893 [ - + ]: 329 : if (rc != ARCHIVE_OK)
1894 [ # # # # ]: 0 : throw archive_exception(a, "cannot select all filters");
1895 : :
1896 [ + - ]: 329 : rc = archive_read_open_FILE (a, fp);
1897 [ - + ]: 329 : if (rc != ARCHIVE_OK)
1898 : : {
1899 [ # # # # : 0 : obatched(clog) << "cannot open archive from pipe " << b_source0 << endl;
# # ]
1900 [ # # # # ]: 0 : throw archive_exception(a, "cannot open archive from pipe");
1901 : : }
1902 : :
1903 : : // archive traversal is in three stages, no, four stages:
1904 : : // 1) skip entries whose names do not match the requested one
1905 : : // 2) extract the matching entry name (set r = result)
1906 : : // 3) extract some number of prefetched entries (just into fdcache)
1907 : : // 4) abort any further processing
1908 : 329 : struct MHD_Response* r = 0; // will set in stage 2
1909 [ + + ]: 329 : unsigned prefetch_count =
1910 : : internal_req_p ? 0 : fdcache_prefetch; // will decrement in stage 3
1911 : :
1912 [ + + ]: 5072 : while(r == 0 || prefetch_count > 0) // stage 1, 2, or 3
1913 : : {
1914 [ + - ]: 5062 : if (interrupted)
1915 : : break;
1916 : :
1917 : 5062 : struct archive_entry *e;
1918 [ + - ]: 5062 : rc = archive_read_next_header (a, &e);
1919 [ + + ]: 5062 : if (rc != ARCHIVE_OK)
1920 : : break;
1921 : :
1922 [ + - + + ]: 4743 : if (! S_ISREG(archive_entry_mode (e))) // skip non-files completely
1923 : 3329 : continue;
1924 : :
1925 [ + - ]: 1414 : string fn = canonicalized_archive_entry_pathname (e);
1926 [ + + + + ]: 1414 : if ((r == 0) && (fn != b_source1)) // stage 1
1927 : 5540 : continue;
1928 : :
1929 [ + - + + ]: 617 : if (fdcache.probe (b_source0, fn) && // skip if already interned
1930 [ - + ]: 12 : fn != b_source1) // but only if we'd just be prefetching, PR29474
1931 : 12 : continue;
1932 : :
1933 : : // extract this file to a temporary file
1934 : 605 : char* tmppath = NULL;
1935 : 605 : rc = asprintf (&tmppath, "%s/debuginfod.XXXXXX", tmpdir.c_str());
1936 [ - + ]: 605 : if (rc < 0)
1937 [ # # # # ]: 0 : throw libc_exception (ENOMEM, "cannot allocate tmppath");
1938 : 605 : defer_dtor<void*,void> tmmpath_freer (tmppath, free);
1939 [ + - ]: 605 : fd = mkstemp (tmppath);
1940 [ - + ]: 605 : if (fd < 0)
1941 [ # # # # ]: 0 : throw libc_exception (errno, "cannot create temporary file");
1942 : : // NB: don't unlink (tmppath), as fdcache will take charge of it.
1943 : :
1944 : : // NB: this can take many uninterruptible seconds for a huge file
1945 [ + - ]: 605 : rc = archive_read_data_into_fd (a, fd);
1946 [ - + ]: 605 : if (rc != ARCHIVE_OK) // e.g. ENOSPC!
1947 : : {
1948 [ # # ]: 0 : close (fd);
1949 : 0 : unlink (tmppath);
1950 [ # # # # ]: 0 : throw archive_exception(a, "cannot extract file");
1951 : : }
1952 : :
1953 : : // Set the mtime so the fdcache file mtimes, even prefetched ones,
1954 : : // propagate to future webapi clients.
1955 : 605 : struct timespec tvs[2];
1956 : 605 : tvs[0].tv_sec = 0;
1957 : 605 : tvs[0].tv_nsec = UTIME_OMIT;
1958 [ + - ]: 605 : tvs[1].tv_sec = archive_entry_mtime(e);
1959 [ + - ]: 605 : tvs[1].tv_nsec = archive_entry_mtime_nsec(e);
1960 : 605 : (void) futimens (fd, tvs); /* best effort */
1961 : :
1962 [ + + ]: 605 : if (r != 0) // stage 3
1963 : : {
1964 : : // NB: now we know we have a complete reusable file; make fdcache
1965 : : // responsible for unlinking it later.
1966 [ + - + - : 276 : fdcache.intern(b_source0, fn,
+ - ]
1967 : : tmppath, archive_entry_size(e),
1968 [ + - + - ]: 552 : false); // prefetched ones go to the prefetch cache
1969 : 276 : prefetch_count --;
1970 [ + - ]: 276 : close (fd); // we're not saving this fd to make a mhd-response from!
1971 [ + + ]: 1690 : continue;
1972 : : }
1973 : :
1974 : : // NB: now we know we have a complete reusable file; make fdcache
1975 : : // responsible for unlinking it later.
1976 [ + - + - : 329 : fdcache.intern(b_source0, b_source1,
+ - ]
1977 : : tmppath, archive_entry_size(e),
1978 [ + - + + ]: 658 : true); // requested ones go to the front of lru
1979 : :
1980 [ + + ]: 329 : if (!section.empty ())
1981 : : {
1982 [ + - + - ]: 2 : int scn_fd = extract_section (fd, b_mtime,
1983 [ + - + - : 4 : b_source0 + ":" + b_source1,
- + - - ]
1984 : : section);
1985 [ + - ]: 2 : close (fd);
1986 [ + - ]: 2 : if (scn_fd >= 0)
1987 : 2 : fd = scn_fd;
1988 : : else
1989 : : {
1990 [ # # ]: 0 : if (verbose)
1991 [ # # # # ]: 0 : obatched (clog) << "cannot find section " << section
1992 : : << " for archive " << b_source0
1993 [ # # # # : 0 : << " file " << b_source1 << endl;
# # # # #
# ]
1994 [ # # ]: 0 : return 0;
1995 : : }
1996 : :
1997 : 2 : rc = fstat(fd, &fs);
1998 [ - + ]: 2 : if (rc < 0)
1999 : : {
2000 [ # # ]: 0 : close (fd);
2001 [ # # ]: 0 : throw libc_exception (errno,
2002 [ # # # # : 0 : string ("fstat ") + b_source0 + string (" ") + section);
# # # # #
# # # # #
# # # # #
# # # # #
# # ]
2003 : : }
2004 [ + - ]: 2 : r = MHD_create_response_from_fd (fs.st_size, fd);
2005 : : }
2006 : : else
2007 [ + - + - ]: 327 : r = MHD_create_response_from_fd (archive_entry_size(e), fd);
2008 : :
2009 [ + - + - : 658 : inc_metric ("http_responses_total","result",archive_extension + " archive");
+ - + - -
+ + + - -
- - ]
2010 [ - + ]: 329 : if (r == 0)
2011 : : {
2012 [ # # ]: 0 : if (verbose)
2013 [ # # # # : 0 : obatched(clog) << "cannot create fd-response for " << b_source0 << endl;
# # ]
2014 [ # # ]: 0 : close(fd);
2015 [ # # ]: 0 : break; // assume no chance of better luck around another iteration; no other copies of same file
2016 : : }
2017 : : else
2018 : : {
2019 [ + - ]: 658 : std::string file = b_source1.substr(b_source1.find_last_of("/")+1, b_source1.length());
2020 [ + - ]: 329 : add_mhd_response_header (r, "Content-Type",
2021 : : "application/octet-stream");
2022 [ + - ]: 329 : add_mhd_response_header (r, "X-DEBUGINFOD-SIZE",
2023 [ + - + - ]: 329 : to_string(archive_entry_size(e)).c_str());
2024 [ + - ]: 329 : add_mhd_response_header (r, "X-DEBUGINFOD-ARCHIVE",
2025 : : b_source0.c_str());
2026 [ + - ]: 329 : add_mhd_response_header (r, "X-DEBUGINFOD-FILE", file.c_str());
2027 [ + - + - ]: 329 : add_mhd_last_modified (r, archive_entry_mtime(e));
2028 [ + - ]: 329 : if (verbose > 1)
2029 [ + - + - : 987 : obatched(clog) << "serving archive " << b_source0
- - ]
2030 : : << " file " << b_source1
2031 [ + - + - : 329 : << " section=" << section << endl;
+ - + - +
- ]
2032 : : /* libmicrohttpd will close it. */
2033 [ + - ]: 329 : if (result_fd)
2034 : 329 : *result_fd = fd;
2035 [ + + ]: 329 : continue;
2036 : : }
2037 : : }
2038 : :
2039 : : // XXX: rpm/file not found: delete this R entry?
2040 : : return r;
2041 : : }
2042 : :
2043 : :
2044 : : static struct MHD_Response*
2045 : 430 : handle_buildid_match (bool internal_req_p,
2046 : : int64_t b_mtime,
2047 : : const string& b_stype,
2048 : : const string& b_source0,
2049 : : const string& b_source1,
2050 : : const string& section,
2051 : : int *result_fd)
2052 : : {
2053 : 430 : try
2054 : : {
2055 [ + + ]: 430 : if (b_stype == "F")
2056 [ + - ]: 37 : return handle_buildid_f_match(internal_req_p, b_mtime, b_source0,
2057 : : section, result_fd);
2058 [ + - ]: 393 : else if (b_stype == "R")
2059 [ + + ]: 393 : return handle_buildid_r_match(internal_req_p, b_mtime, b_source0,
2060 : : b_source1, section, result_fd);
2061 : : }
2062 [ - + ]: 58 : catch (const reportable_exception &e)
2063 : : {
2064 [ + - ]: 29 : e.report(clog);
2065 : : // Report but swallow libc etc. errors here; let the caller
2066 : : // iterate to other matches of the content.
2067 : : }
2068 : :
2069 : : return 0;
2070 : : }
2071 : :
2072 : :
2073 : : static int
2074 : 96 : debuginfod_find_progress (debuginfod_client *, long a, long b)
2075 : : {
2076 [ - + ]: 96 : if (verbose > 4)
2077 [ # # # # : 0 : obatched(clog) << "federated debuginfod progress=" << a << "/" << b << endl;
# # # # #
# ]
2078 : :
2079 : 96 : return interrupted;
2080 : : }
2081 : :
2082 : :
2083 : : // a little lru pool of debuginfod_client*s for reuse between query threads
2084 : :
2085 : : mutex dc_pool_lock;
2086 : : deque<debuginfod_client*> dc_pool;
2087 : :
2088 : 70 : debuginfod_client* debuginfod_pool_begin()
2089 : : {
2090 : 140 : unique_lock<mutex> lock(dc_pool_lock);
2091 [ + + ]: 70 : if (dc_pool.size() > 0)
2092 : : {
2093 [ + - + - : 114 : inc_metric("dc_pool_op_count","op","begin-reuse");
+ - + - -
+ - + - -
- - - - ]
2094 : 57 : debuginfod_client *c = dc_pool.front();
2095 : 57 : dc_pool.pop_front();
2096 : 57 : return c;
2097 : : }
2098 [ + - + - : 26 : inc_metric("dc_pool_op_count","op","begin-new");
+ - + - -
+ - + + -
- - - - -
- ]
2099 [ + - ]: 13 : return debuginfod_begin();
2100 : : }
2101 : :
2102 : :
2103 : 71 : void debuginfod_pool_groom()
2104 : : {
2105 : 71 : unique_lock<mutex> lock(dc_pool_lock);
2106 [ + + ]: 84 : while (dc_pool.size() > 0)
2107 : : {
2108 [ + - + - : 26 : inc_metric("dc_pool_op_count","op","end");
+ - + - -
+ - + + -
- - - - -
- ]
2109 [ + - ]: 13 : debuginfod_end(dc_pool.front());
2110 : 13 : dc_pool.pop_front();
2111 : : }
2112 : 71 : }
2113 : :
2114 : :
2115 : 70 : void debuginfod_pool_end(debuginfod_client* c)
2116 : : {
2117 : 70 : unique_lock<mutex> lock(dc_pool_lock);
2118 [ + - + - : 140 : inc_metric("dc_pool_op_count","op","end-save");
+ - + - -
+ - + + -
- - - - -
- ]
2119 [ + - ]: 70 : dc_pool.push_front(c); // accelerate reuse, vs. push_back
2120 : 70 : }
2121 : :
2122 : :
2123 : : static struct MHD_Response*
2124 : 472 : handle_buildid (MHD_Connection* conn,
2125 : : const string& buildid /* unsafe */,
2126 : : string& artifacttype /* unsafe, cleanse on exception/return */,
2127 : : const string& suffix /* unsafe */,
2128 : : int *result_fd)
2129 : : {
2130 : : // validate artifacttype
2131 : 873 : string atype_code;
2132 [ + + + - ]: 472 : if (artifacttype == "debuginfo") atype_code = "D";
2133 [ + + + - ]: 103 : else if (artifacttype == "executable") atype_code = "E";
2134 [ + + + - ]: 34 : else if (artifacttype == "source") atype_code = "S";
2135 [ + + + - ]: 6 : else if (artifacttype == "section") atype_code = "I";
2136 : : else {
2137 [ + - ]: 2 : artifacttype = "invalid"; // PR28242 ensure http_resposes metrics don't propagate unclean user data
2138 [ + - + - ]: 6 : throw reportable_exception("invalid artifacttype");
2139 : : }
2140 : :
2141 [ + + ]: 470 : if (conn != 0)
2142 [ + - + - : 971 : inc_metric("http_requests_total", "type", artifacttype);
+ - - + -
- - + ]
2143 : :
2144 [ - + ]: 940 : string section;
2145 [ + + ]: 470 : if (atype_code == "I")
2146 : : {
2147 [ - + ]: 4 : if (suffix.size () < 2)
2148 [ # # # # ]: 0 : throw reportable_exception ("invalid section suffix");
2149 : :
2150 : : // Remove leading '/'
2151 [ + - - + ]: 4 : section = suffix.substr(1);
2152 : : }
2153 : :
2154 [ + + - + ]: 498 : if (atype_code == "S" && suffix == "")
2155 [ # # # # ]: 0 : throw reportable_exception("invalid source suffix");
2156 : :
2157 : : // validate buildid
2158 [ + + ]: 470 : if ((buildid.size() < 2) || // not empty
2159 [ + + + - : 939 : (buildid.size() % 2) || // even number
+ - ]
2160 : 469 : (buildid.find_first_not_of("0123456789abcdef") != string::npos)) // pure tasty lowercase hex
2161 [ + - - + ]: 2 : throw reportable_exception("invalid buildid");
2162 : :
2163 [ + - ]: 469 : if (verbose > 1)
2164 [ + - + - ]: 1407 : obatched(clog) << "searching for buildid=" << buildid << " artifacttype=" << artifacttype
2165 [ + - + - : 469 : << " suffix=" << suffix << endl;
+ - + - +
- ]
2166 : :
2167 : : // If invoked from the scanner threads, use the scanners' read-write
2168 : : // connection. Otherwise use the web query threads' read-only connection.
2169 [ + + ]: 469 : sqlite3 *thisdb = (conn == 0) ? db : dbq;
2170 : :
2171 : 469 : sqlite_ps *pp = 0;
2172 : :
2173 [ + + ]: 469 : if (atype_code == "D")
2174 : : {
2175 [ + - + - ]: 738 : pp = new sqlite_ps (thisdb, "mhd-query-d",
2176 : : "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_d where buildid = ? "
2177 [ + - + - : 807 : "order by mtime desc");
+ - + - -
+ + - - -
- + ]
2178 [ + - ]: 369 : pp->reset();
2179 [ + - ]: 369 : pp->bind(1, buildid);
2180 : : }
2181 [ + + ]: 100 : else if (atype_code == "E")
2182 : : {
2183 [ + - + - ]: 136 : pp = new sqlite_ps (thisdb, "mhd-query-e",
2184 : : "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_e where buildid = ? "
2185 [ + - + - : 136 : "order by mtime desc");
+ - + - -
+ + - -
- ]
2186 [ + - ]: 68 : pp->reset();
2187 [ + - ]: 68 : pp->bind(1, buildid);
2188 : : }
2189 [ + + ]: 32 : else if (atype_code == "S")
2190 : : {
2191 : : // PR25548
2192 : : // Incoming source queries may come in with either dwarf-level OR canonicalized paths.
2193 : : // We let the query pass with either one.
2194 : :
2195 [ + - + - ]: 56 : pp = new sqlite_ps (thisdb, "mhd-query-s",
2196 : : "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_s where buildid = ? and artifactsrc in (?,?) "
2197 [ + - + - : 56 : "order by sharedprefix(source0,source0ref) desc, mtime desc");
+ - + - -
+ + - -
- ]
2198 [ + - ]: 28 : pp->reset();
2199 [ + - ]: 28 : pp->bind(1, buildid);
2200 : : // NB: we don't store the non-canonicalized path names any more, but old databases
2201 : : // might have them (and no canon ones), so we keep searching for both.
2202 [ + - ]: 28 : pp->bind(2, suffix);
2203 [ + - + - ]: 56 : pp->bind(3, canon_pathname(suffix));
2204 : : }
2205 [ + - ]: 4 : else if (atype_code == "I")
2206 : : {
2207 [ + - + - ]: 8 : pp = new sqlite_ps (thisdb, "mhd-query-i",
2208 : : "select mtime, sourcetype, source0, source1, 1 as debug_p from " BUILDIDS "_query_d where buildid = ? "
2209 : : "union all "
2210 : : "select mtime, sourcetype, source0, source1, 0 as debug_p from " BUILDIDS "_query_e where buildid = ? "
2211 [ + - + - : 77 : "order by debug_p desc, mtime desc");
+ - + - -
+ + - - -
- + ]
2212 [ + - ]: 4 : pp->reset();
2213 [ + - ]: 4 : pp->bind(1, buildid);
2214 [ + - ]: 4 : pp->bind(2, buildid);
2215 : : }
2216 [ - + ]: 938 : unique_ptr<sqlite_ps> ps_closer(pp); // release pp if exception or return
2217 : :
2218 : 469 : bool do_upstream_section_query = true;
2219 : :
2220 : : // consume all the rows
2221 : 500 : while (1)
2222 : : {
2223 [ + - ]: 500 : int rc = pp->step();
2224 [ + + ]: 500 : if (rc == SQLITE_DONE) break;
2225 [ - + ]: 430 : if (rc != SQLITE_ROW)
2226 [ # # # # ]: 0 : throw sqlite_exception(rc, "step");
2227 : :
2228 [ + - ]: 430 : int64_t b_mtime = sqlite3_column_int64 (*pp, 0);
2229 [ + - - + : 461 : string b_stype = string((const char*) sqlite3_column_text (*pp, 1) ?: ""); /* by DDL may not be NULL */
+ - ]
2230 [ + - - + : 461 : string b_source0 = string((const char*) sqlite3_column_text (*pp, 2) ?: ""); /* may be NULL */
+ - - + ]
2231 [ + - + + : 498 : string b_source1 = string((const char*) sqlite3_column_text (*pp, 3) ?: ""); /* may be NULL */
+ - + - ]
2232 : :
2233 [ + - ]: 430 : if (verbose > 1)
2234 [ + - + - : 1290 : obatched(clog) << "found mtime=" << b_mtime << " stype=" << b_stype
- - ]
2235 [ + - + - : 430 : << " source0=" << b_source0 << " source1=" << b_source1 << endl;
+ - + - +
- + - +
- ]
2236 : :
2237 : : // Try accessing the located match.
2238 : : // XXX: in case of multiple matches, attempt them in parallel?
2239 [ + - ]: 430 : auto r = handle_buildid_match (conn ? false : true,
2240 : : b_mtime, b_stype, b_source0, b_source1,
2241 : : section, result_fd);
2242 [ + + ]: 430 : if (r)
2243 [ + + + - ]: 762 : return r;
2244 : :
2245 : : // If a debuginfo file matching BUILDID was found but didn't contain
2246 : : // the desired section, then the section should not exist. Don't
2247 : : // bother querying upstream servers.
2248 [ + + + - : 31 : if (!section.empty () && (sqlite3_column_int (*pp, 4) == 1))
+ - ]
2249 : : {
2250 : 2 : struct stat st;
2251 : :
2252 : : // For "F" sourcetype, check if the debuginfo exists. For "R"
2253 : : // sourcetype, check if the debuginfo was interned into the fdcache.
2254 [ - + ]: 3 : if ((b_stype == "F" && (stat (b_source0.c_str (), &st) == 0))
2255 [ + + + - : 3 : || (b_stype == "R" && fdcache.probe (b_source0, b_source1)))
+ - + - ]
2256 : : do_upstream_section_query = false;
2257 : : }
2258 : : }
2259 [ + - ]: 70 : pp->reset();
2260 : :
2261 [ - + ]: 70 : if (!do_upstream_section_query)
2262 [ # # # # ]: 0 : throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found");
2263 : :
2264 : : // We couldn't find it in the database. Last ditch effort
2265 : : // is to defer to other debuginfo servers.
2266 : :
2267 : 70 : int fd = -1;
2268 [ + - ]: 70 : debuginfod_client *client = debuginfod_pool_begin ();
2269 [ - + ]: 70 : if (client == NULL)
2270 [ # # # # ]: 0 : throw libc_exception(errno, "debuginfod client pool alloc");
2271 : 469 : defer_dtor<debuginfod_client*,void> client_closer (client, debuginfod_pool_end);
2272 : :
2273 [ + - ]: 70 : debuginfod_set_progressfn (client, & debuginfod_find_progress);
2274 : :
2275 [ + - ]: 70 : if (conn)
2276 : : {
2277 : : // Transcribe incoming User-Agent:
2278 [ + - - + : 140 : string ua = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: "";
+ - ]
2279 [ + - + - : 210 : string ua_complete = string("User-Agent: ") + ua;
+ - + + +
- ]
2280 [ + - ]: 70 : debuginfod_add_http_header (client, ua_complete.c_str());
2281 : :
2282 : : // Compute larger XFF:, for avoiding info loss during
2283 : : // federation, and for future cyclicity detection.
2284 [ + - + + : 201 : string xff = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: "";
+ - + - ]
2285 [ + + ]: 70 : if (xff != "")
2286 [ + - - + : 20 : xff += string(", "); // comma separated list
- + ]
2287 : :
2288 : 70 : unsigned int xff_count = 0;
2289 [ + + ]: 190 : for (auto&& i : xff){
2290 [ + + ]: 120 : if (i == ',') xff_count++;
2291 : : }
2292 : :
2293 : : // if X-Forwarded-For: exceeds N hops,
2294 : : // do not delegate a local lookup miss to upstream debuginfods.
2295 [ + + ]: 70 : if (xff_count >= forwarded_ttl_limit)
2296 [ + - ]: 2 : throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found, --forwared-ttl-limit reached \
2297 [ + - ]: 4 : and will not query the upstream servers");
2298 : :
2299 : : // Compute the client's numeric IP address only - so can't merge with conninfo()
2300 [ + - ]: 68 : const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn,
2301 : : MHD_CONNECTION_INFO_CLIENT_ADDRESS);
2302 [ + - ]: 68 : struct sockaddr *so = u ? u->client_addr : 0;
2303 : 68 : char hostname[256] = ""; // RFC1035
2304 [ + - - + ]: 68 : if (so && so->sa_family == AF_INET) {
2305 [ # # ]: 0 : (void) getnameinfo (so, sizeof (struct sockaddr_in), hostname, sizeof (hostname), NULL, 0,
2306 : : NI_NUMERICHOST);
2307 [ + - + - ]: 68 : } else if (so && so->sa_family == AF_INET6) {
2308 : 68 : struct sockaddr_in6* addr6 = (struct sockaddr_in6*) so;
2309 [ + - + - : 68 : if (IN6_IS_ADDR_V4MAPPED(&addr6->sin6_addr)) {
- + ]
2310 : 68 : struct sockaddr_in addr4;
2311 [ + - ]: 68 : memset (&addr4, 0, sizeof(addr4));
2312 : 68 : addr4.sin_family = AF_INET;
2313 : 68 : addr4.sin_port = addr6->sin6_port;
2314 [ + - ]: 68 : memcpy (&addr4.sin_addr.s_addr, addr6->sin6_addr.s6_addr+12, sizeof(addr4.sin_addr.s_addr));
2315 [ + - ]: 68 : (void) getnameinfo ((struct sockaddr*) &addr4, sizeof (addr4),
2316 : : hostname, sizeof (hostname), NULL, 0,
2317 : : NI_NUMERICHOST);
2318 : : } else {
2319 [ # # ]: 0 : (void) getnameinfo (so, sizeof (struct sockaddr_in6), hostname, sizeof (hostname), NULL, 0,
2320 : : NI_NUMERICHOST);
2321 : : }
2322 : : }
2323 : :
2324 [ + - + - : 206 : string xff_complete = string("X-Forwarded-For: ")+xff+string(hostname);
+ - + - -
+ - + + -
+ + - - -
- - + ]
2325 [ + - ]: 68 : debuginfod_add_http_header (client, xff_complete.c_str());
2326 : : }
2327 : :
2328 [ + + ]: 68 : if (artifacttype == "debuginfo")
2329 [ + - ]: 34 : fd = debuginfod_find_debuginfo (client,
2330 [ + - ]: 34 : (const unsigned char*) buildid.c_str(),
2331 : : 0, NULL);
2332 [ + + ]: 34 : else if (artifacttype == "executable")
2333 [ + - ]: 33 : fd = debuginfod_find_executable (client,
2334 [ + - ]: 33 : (const unsigned char*) buildid.c_str(),
2335 : : 0, NULL);
2336 [ + - ]: 1 : else if (artifacttype == "source")
2337 [ + - ]: 1 : fd = debuginfod_find_source (client,
2338 [ + - ]: 1 : (const unsigned char*) buildid.c_str(),
2339 : : 0, suffix.c_str(), NULL);
2340 [ # # ]: 0 : else if (artifacttype == "section")
2341 [ # # ]: 0 : fd = debuginfod_find_section (client,
2342 [ # # ]: 0 : (const unsigned char*) buildid.c_str(),
2343 : : 0, section.c_str(), NULL);
2344 : :
2345 [ + + ]: 68 : if (fd >= 0)
2346 : : {
2347 [ + - ]: 2 : if (conn != 0)
2348 [ + - + - : 72 : inc_metric ("http_responses_total","result","upstream");
+ - + - -
+ - + - -
- - ]
2349 : 2 : struct stat s;
2350 : 2 : int rc = fstat (fd, &s);
2351 [ + - ]: 2 : if (rc == 0)
2352 : : {
2353 [ + - ]: 2 : auto r = MHD_create_response_from_fd ((uint64_t) s.st_size, fd);
2354 [ + - ]: 2 : if (r)
2355 : : {
2356 [ + - ]: 2 : add_mhd_response_header (r, "Content-Type",
2357 : : "application/octet-stream");
2358 : : // Copy the incoming headers
2359 [ + - ]: 2 : const char * hdrs = debuginfod_get_headers(client);
2360 [ + - ]: 4 : string header_dup;
2361 [ + - ]: 2 : if (hdrs)
2362 [ + - - + ]: 2 : header_dup = string(hdrs);
2363 : : // Parse the "header: value\n" lines into (h,v) tuples and pass on
2364 : 10 : while(1)
2365 : : {
2366 : 6 : size_t newline = header_dup.find('\n');
2367 [ + + ]: 6 : if (newline == string::npos) break;
2368 : 4 : size_t colon = header_dup.find(':');
2369 [ + - ]: 4 : if (colon == string::npos) break;
2370 [ + - ]: 4 : string header = header_dup.substr(0,colon);
2371 [ + - + - ]: 8 : string value = header_dup.substr(colon+1,newline-colon-1);
2372 : : // strip leading spaces from value
2373 : 4 : size_t nonspace = value.find_first_not_of(" ");
2374 [ + - ]: 4 : if (nonspace != string::npos)
2375 [ + - - + ]: 4 : value = value.substr(nonspace);
2376 [ + - ]: 4 : add_mhd_response_header(r, header.c_str(), value.c_str());
2377 [ + - + + : 6 : header_dup = header_dup.substr(newline+1);
- + ]
2378 : 4 : }
2379 : :
2380 [ + - ]: 2 : add_mhd_last_modified (r, s.st_mtime);
2381 [ + - ]: 2 : if (verbose > 1)
2382 [ + - + - : 4 : obatched(clog) << "serving file from upstream debuginfod/cache" << endl;
- - ]
2383 [ + - ]: 2 : if (result_fd)
2384 : 2 : *result_fd = fd;
2385 [ + - ]: 2 : return r; // NB: don't close fd; libmicrohttpd will
2386 : : }
2387 : : }
2388 [ # # ]: 0 : close (fd);
2389 : : }
2390 : : else
2391 [ + + ]: 66 : switch(fd)
2392 : : {
2393 : : case -ENOSYS:
2394 : : break;
2395 : : case -ENOENT:
2396 : : break;
2397 : 26 : default: // some more tricky error
2398 [ + - + - ]: 52 : throw libc_exception(-fd, "upstream debuginfod query failed");
2399 : : }
2400 : :
2401 [ + - - + ]: 80 : throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found");
2402 : : }
2403 : :
2404 : :
2405 : : ////////////////////////////////////////////////////////////////////////
2406 : :
2407 : : static map<string,double> metrics; // arbitrary data for /metrics query
2408 : : // NB: store int64_t since all our metrics are integers; prometheus accepts double
2409 : : static mutex metrics_lock;
2410 : : // NB: these objects get released during the process exit via global dtors
2411 : : // do not call them from within other global dtors
2412 : :
2413 : : // utility function for assembling prometheus-compatible
2414 : : // name="escaped-value" strings
2415 : : // https://prometheus.io/docs/instrumenting/exposition_formats/
2416 : : static string
2417 : 58828 : metric_label(const string& name, const string& value)
2418 : : {
2419 : 58828 : string x = name + "=\"";
2420 [ + + ]: 778063 : for (auto&& c : value)
2421 [ - - - + ]: 719283 : switch(c)
2422 : : {
2423 [ # # ]: 0 : case '\\': x += "\\\\"; break;
2424 [ # # ]: 0 : case '\"': x += "\\\""; break;
2425 [ # # ]: 0 : case '\n': x += "\\n"; break;
2426 [ + - ]: 1438514 : default: x += c; break;
2427 : : }
2428 [ + - ]: 58780 : x += "\"";
2429 : 58788 : return x;
2430 : : }
2431 : :
2432 : :
2433 : : // add prometheus-format metric name + label tuple (if any) + value
2434 : :
2435 : : static void
2436 : 4404 : set_metric(const string& metric, double value)
2437 : : {
2438 : 4404 : unique_lock<mutex> lock(metrics_lock);
2439 [ + - + - ]: 4404 : metrics[metric] = value;
2440 : 4404 : }
2441 : : #if 0 /* unused */
2442 : : static void
2443 : : inc_metric(const string& metric)
2444 : : {
2445 : : unique_lock<mutex> lock(metrics_lock);
2446 : : metrics[metric] ++;
2447 : : }
2448 : : #endif
2449 : : static void
2450 : 3523 : set_metric(const string& metric,
2451 : : const string& lname, const string& lvalue,
2452 : : double value)
2453 : : {
2454 [ + - + - : 7046 : string key = (metric + "{" + metric_label(lname, lvalue) + "}");
+ - - + +
+ - - -
- ]
2455 [ + - + - ]: 7046 : unique_lock<mutex> lock(metrics_lock);
2456 [ + - + - ]: 3523 : metrics[key] = value;
2457 : 3523 : }
2458 : :
2459 : : static void
2460 : 23726 : inc_metric(const string& metric,
2461 : : const string& lname, const string& lvalue)
2462 : : {
2463 [ + - + - : 47520 : string key = (metric + "{" + metric_label(lname, lvalue) + "}");
+ - + + +
+ - - -
- ]
2464 [ + - + - ]: 47456 : unique_lock<mutex> lock(metrics_lock);
2465 [ + - + - ]: 23729 : metrics[key] ++;
2466 : 23728 : }
2467 : : static void
2468 : 22058 : add_metric(const string& metric,
2469 : : const string& lname, const string& lvalue,
2470 : : double value)
2471 : : {
2472 [ + - + - : 44183 : string key = (metric + "{" + metric_label(lname, lvalue) + "}");
+ - + + +
+ - - -
- ]
2473 [ + - + - ]: 44123 : unique_lock<mutex> lock(metrics_lock);
2474 [ + - + - ]: 22064 : metrics[key] += value;
2475 : 22063 : }
2476 : : #if 0
2477 : : static void
2478 : : add_metric(const string& metric,
2479 : : double value)
2480 : : {
2481 : : unique_lock<mutex> lock(metrics_lock);
2482 : : metrics[metric] += value;
2483 : : }
2484 : : #endif
2485 : :
2486 : :
2487 : : // and more for higher arity labels if needed
2488 : :
2489 : : static void
2490 : 2388 : inc_metric(const string& metric,
2491 : : const string& lname, const string& lvalue,
2492 : : const string& rname, const string& rvalue)
2493 : : {
2494 [ + - - + : 4776 : string key = (metric + "{"
- - ]
2495 [ + - + - : 9552 : + metric_label(lname, lvalue) + ","
+ - - + -
+ + + - -
- - - - ]
2496 [ + - + - : 7164 : + metric_label(rname, rvalue) + "}");
- + - - ]
2497 [ + - + - ]: 4776 : unique_lock<mutex> lock(metrics_lock);
2498 [ + - + - ]: 2388 : metrics[key] ++;
2499 : 2388 : }
2500 : : static void
2501 : 2388 : add_metric(const string& metric,
2502 : : const string& lname, const string& lvalue,
2503 : : const string& rname, const string& rvalue,
2504 : : double value)
2505 : : {
2506 [ + - - + : 4776 : string key = (metric + "{"
- - ]
2507 [ + - + - : 9552 : + metric_label(lname, lvalue) + ","
+ - - + -
+ + + - -
- - - - ]
2508 [ + - + - : 7164 : + metric_label(rname, rvalue) + "}");
- + - - ]
2509 [ + - + - ]: 4776 : unique_lock<mutex> lock(metrics_lock);
2510 [ + - + - ]: 2388 : metrics[key] += value;
2511 : 2388 : }
2512 : :
2513 : : static struct MHD_Response*
2514 : 341 : handle_metrics (off_t* size)
2515 : : {
2516 : 341 : stringstream o;
2517 : 341 : {
2518 [ + - ]: 341 : unique_lock<mutex> lock(metrics_lock);
2519 [ + + ]: 29414 : for (auto&& i : metrics)
2520 [ + - ]: 29073 : o << i.first
2521 : : << " "
2522 [ + - + - ]: 29073 : << std::setprecision(std::numeric_limits<double>::digits10 + 1)
2523 [ + - + - ]: 29073 : << i.second
2524 : 29073 : << endl;
2525 : : }
2526 [ + - ]: 682 : const string& os = o.str();
2527 [ + - ]: 341 : MHD_Response* r = MHD_create_response_from_buffer (os.size(),
2528 [ + - ]: 341 : (void*) os.c_str(),
2529 : : MHD_RESPMEM_MUST_COPY);
2530 [ + - ]: 341 : if (r != NULL)
2531 : : {
2532 [ + - ]: 341 : *size = os.size();
2533 [ + - ]: 341 : add_mhd_response_header (r, "Content-Type", "text/plain");
2534 : : }
2535 [ + - ]: 682 : return r;
2536 : : }
2537 : :
2538 : : static struct MHD_Response*
2539 : 0 : handle_root (off_t* size)
2540 : : {
2541 [ # # # # : 0 : static string version = "debuginfod (" + string (PACKAGE_NAME) + ") "
# # # # #
# # # # #
# # # # ]
2542 [ # # # # : 0 : + string (PACKAGE_VERSION);
# # # # #
# ]
2543 : 0 : MHD_Response* r = MHD_create_response_from_buffer (version.size (),
2544 : 0 : (void *) version.c_str (),
2545 : : MHD_RESPMEM_PERSISTENT);
2546 [ # # ]: 0 : if (r != NULL)
2547 : : {
2548 : 0 : *size = version.size ();
2549 : 0 : add_mhd_response_header (r, "Content-Type", "text/plain");
2550 : : }
2551 : 0 : return r;
2552 : : }
2553 : :
2554 : :
2555 : : ////////////////////////////////////////////////////////////////////////
2556 : :
2557 : :
2558 : : /* libmicrohttpd callback */
2559 : : static MHD_RESULT
2560 : 1592 : handler_cb (void * /*cls*/,
2561 : : struct MHD_Connection *connection,
2562 : : const char *url,
2563 : : const char *method,
2564 : : const char * /*version*/,
2565 : : const char * /*upload_data*/,
2566 : : size_t * /*upload_data_size*/,
2567 : : void ** ptr)
2568 : : {
2569 : 1592 : struct MHD_Response *r = NULL;
2570 : 3184 : string url_copy = url;
2571 : :
2572 : : /* libmicrohttpd always makes (at least) two callbacks: once just
2573 : : past the headers, and one after the request body is finished
2574 : : being received. If we process things early (first callback) and
2575 : : queue a response, libmicrohttpd would suppress http keep-alive
2576 : : (via connection->read_closed = true). */
2577 : 1592 : static int aptr; /* just some random object to use as a flag */
2578 [ + + ]: 1592 : if (&aptr != *ptr)
2579 : : {
2580 : : /* do never respond on first call */
2581 : 796 : *ptr = &aptr;
2582 : 796 : return MHD_YES;
2583 : : }
2584 : 796 : *ptr = NULL; /* reset when done */
2585 : :
2586 [ + - ]: 796 : const char *maxsize_string = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, "X-DEBUGINFOD-MAXSIZE");
2587 : 796 : long maxsize = 0;
2588 [ + + + - ]: 796 : if (maxsize_string != NULL && maxsize_string[0] != '\0')
2589 : 1 : maxsize = atol(maxsize_string);
2590 : : else
2591 : : maxsize = 0;
2592 : :
2593 : : #if MHD_VERSION >= 0x00097002
2594 : 796 : enum MHD_Result rc;
2595 : : #else
2596 : : int rc = MHD_NO; // mhd
2597 : : #endif
2598 : 796 : int http_code = 500;
2599 : 796 : off_t http_size = -1;
2600 : 796 : struct timespec ts_start, ts_end;
2601 : 796 : clock_gettime (CLOCK_MONOTONIC, &ts_start);
2602 : 796 : double afteryou = 0.0;
2603 [ + - - + : 3184 : string artifacttype, suffix;
+ + ]
2604 : :
2605 : 796 : try
2606 : : {
2607 [ + - - + : 1592 : if (string(method) != "GET")
- + ]
2608 [ # # # # ]: 0 : throw reportable_exception(400, "we support GET only");
2609 : :
2610 : : /* Start decoding the URL. */
2611 : 796 : size_t slash1 = url_copy.find('/', 1);
2612 [ + - ]: 1592 : string url1 = url_copy.substr(0, slash1); // ok even if slash1 not found
2613 : :
2614 [ + + + - ]: 1248 : if (slash1 != string::npos && url1 == "/buildid")
2615 : : {
2616 : : // PR27863: block this thread awhile if another thread is already busy
2617 : : // fetching the exact same thing. This is better for Everyone.
2618 : : // The latecomer says "... after you!" and waits.
2619 [ + - + - : 1431 : add_metric ("thread_busy", "role", "http-buildid-after-you", 1);
+ - + - -
+ + - - -
- - - + ]
2620 : : #ifdef HAVE_PTHREAD_SETNAME_NP
2621 : 452 : (void) pthread_setname_np (pthread_self(), "mhd-buildid-after-you");
2622 : : #endif
2623 : 452 : struct timespec tsay_start, tsay_end;
2624 : 452 : clock_gettime (CLOCK_MONOTONIC, &tsay_start);
2625 [ + + + - ]: 452 : static unique_set<string> busy_urls;
2626 [ + - ]: 833 : unique_set_reserver<string> after_you(busy_urls, url_copy);
2627 : 452 : clock_gettime (CLOCK_MONOTONIC, &tsay_end);
2628 : 452 : afteryou = (tsay_end.tv_sec - tsay_start.tv_sec) + (tsay_end.tv_nsec - tsay_start.tv_nsec)/1.e9;
2629 [ + - + - : 1427 : add_metric ("thread_busy", "role", "http-buildid-after-you", -1);
+ - + - -
+ + - + -
- - - - ]
2630 : :
2631 [ + - + - : 1046 : tmp_inc_metric m ("thread_busy", "role", "http-buildid");
+ - + - -
+ - + - -
- - ]
2632 : : #ifdef HAVE_PTHREAD_SETNAME_NP
2633 : 452 : (void) pthread_setname_np (pthread_self(), "mhd-buildid");
2634 : : #endif
2635 : 452 : size_t slash2 = url_copy.find('/', slash1+1);
2636 [ - + ]: 452 : if (slash2 == string::npos)
2637 [ # # # # ]: 0 : throw reportable_exception("/buildid/ webapi error, need buildid");
2638 : :
2639 [ + - ]: 904 : string buildid = url_copy.substr(slash1+1, slash2-slash1-1);
2640 : :
2641 : 452 : size_t slash3 = url_copy.find('/', slash2+1);
2642 : :
2643 [ + + ]: 452 : if (slash3 == string::npos)
2644 : : {
2645 [ + - - + ]: 420 : artifacttype = url_copy.substr(slash2+1);
2646 [ + - ]: 420 : suffix = "";
2647 : : }
2648 : : else
2649 : : {
2650 [ + - - + ]: 32 : artifacttype = url_copy.substr(slash2+1, slash3-slash2-1);
2651 [ + - - + ]: 32 : suffix = url_copy.substr(slash3); // include the slash in the suffix
2652 : : }
2653 : :
2654 : : // get the resulting fd so we can report its size
2655 : 452 : int fd;
2656 [ + + ]: 452 : r = handle_buildid(connection, buildid, artifacttype, suffix, &fd);
2657 [ + - ]: 381 : if (r)
2658 : : {
2659 : 381 : struct stat fs;
2660 [ + - ]: 381 : if (fstat(fd, &fs) == 0)
2661 : 381 : http_size = fs.st_size;
2662 : : // libmicrohttpd will close (fd);
2663 : : }
2664 : : }
2665 [ + + ]: 344 : else if (url1 == "/metrics")
2666 : : {
2667 [ + - + - : 682 : tmp_inc_metric m ("thread_busy", "role", "http-metrics");
+ - + - -
+ - + + -
- - - - ]
2668 [ + - ]: 341 : artifacttype = "metrics";
2669 [ + - + - : 682 : inc_metric("http_requests_total", "type", artifacttype);
+ - - + +
- - - ]
2670 [ + - ]: 341 : r = handle_metrics(& http_size);
2671 : : }
2672 [ - + ]: 3 : else if (url1 == "/")
2673 : : {
2674 [ # # ]: 0 : artifacttype = "/";
2675 [ - - - - : 75 : inc_metric("http_requests_total", "type", artifacttype);
- - - - -
- - - -
+ ]
2676 [ # # ]: 0 : r = handle_root(& http_size);
2677 : : }
2678 : : else
2679 [ + - + - : 9 : throw reportable_exception("webapi error, unrecognized '" + url1 + "'");
+ - - + -
- ]
2680 : :
2681 [ - + ]: 722 : if (r == 0)
2682 [ # # # # ]: 0 : throw reportable_exception("internal error, missing response");
2683 : :
2684 [ + + + - ]: 722 : if (maxsize > 0 && http_size > maxsize)
2685 : : {
2686 [ + - ]: 1 : MHD_destroy_response(r);
2687 [ + - + - : 3 : throw reportable_exception(406, "File too large, max size=" + std::to_string(maxsize));
+ - - + -
- ]
2688 : : }
2689 : :
2690 [ + - ]: 721 : rc = MHD_queue_response (connection, MHD_HTTP_OK, r);
2691 : 721 : http_code = MHD_HTTP_OK;
2692 [ + - ]: 721 : MHD_destroy_response (r);
2693 : : }
2694 [ - + ]: 75 : catch (const reportable_exception& e)
2695 : : {
2696 [ + - + - : 150 : inc_metric("http_responses_total","result","error");
+ - + - -
+ - + + -
- - - - ]
2697 [ + - ]: 75 : e.report(clog);
2698 : 75 : http_code = e.code;
2699 [ + - ]: 75 : http_size = e.message.size();
2700 [ + - ]: 75 : rc = e.mhd_send_response (connection);
2701 : : }
2702 : :
2703 : 796 : clock_gettime (CLOCK_MONOTONIC, &ts_end);
2704 : 796 : double deltas = (ts_end.tv_sec - ts_start.tv_sec) + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9;
2705 : : // afteryou: delay waiting for other client's identical query to complete
2706 : : // deltas: total latency, including afteryou waiting
2707 [ + - + - : 2388 : obatched(clog) << conninfo(connection)
+ - - - ]
2708 : : << ' ' << method << ' ' << url
2709 [ + - + - : 1592 : << ' ' << http_code << ' ' << http_size
+ - + - +
- + - +
- ]
2710 [ + - + - : 1592 : << ' ' << (int)(afteryou*1000) << '+' << (int)((deltas-afteryou)*1000) << "ms"
+ - + - +
- + - ]
2711 [ + - ]: 796 : << endl;
2712 : :
2713 : : // related prometheus metrics
2714 [ + - + + ]: 1592 : string http_code_str = to_string(http_code);
2715 [ + - + - ]: 1592 : add_metric("http_responses_transfer_bytes_sum",
2716 [ + - + - : 1592 : "code", http_code_str, "type", artifacttype, http_size);
+ - - + -
+ + - - -
- - - - ]
2717 [ + - + - ]: 1592 : inc_metric("http_responses_transfer_bytes_count",
2718 [ + - + - : 1592 : "code", http_code_str, "type", artifacttype);
+ - - + -
+ + - - -
- - ]
2719 : :
2720 [ + - + - ]: 1592 : add_metric("http_responses_duration_milliseconds_sum",
2721 [ + - + - : 1592 : "code", http_code_str, "type", artifacttype, deltas*1000); // prometheus prefers _seconds and floating point
+ - - + -
+ + - - -
- - ]
2722 [ + - + - ]: 1592 : inc_metric("http_responses_duration_milliseconds_count",
2723 [ + - + - : 1592 : "code", http_code_str, "type", artifacttype);
+ - - + -
+ + - - -
- - ]
2724 : :
2725 [ + - + - ]: 1592 : add_metric("http_responses_after_you_milliseconds_sum",
2726 [ + - + - : 1592 : "code", http_code_str, "type", artifacttype, afteryou*1000);
+ - - + -
+ + - - -
- - ]
2727 [ + - + - ]: 1592 : inc_metric("http_responses_after_you_milliseconds_count",
2728 [ + - + - : 1592 : "code", http_code_str, "type", artifacttype);
+ - - + -
+ - + - -
- - - - ]
2729 : :
2730 [ - + ]: 796 : return rc;
2731 : : }
2732 : :
2733 : :
2734 : : ////////////////////////////////////////////////////////////////////////
2735 : : // borrowed originally from src/nm.c get_local_names()
2736 : :
2737 : : static void
2738 : 105 : dwarf_extract_source_paths (Elf *elf, set<string>& debug_sourcefiles)
2739 : : noexcept // no exceptions - so we can simplify the altdbg resource release at end
2740 : : {
2741 : 105 : Dwarf* dbg = dwarf_begin_elf (elf, DWARF_C_READ, NULL);
2742 [ - + ]: 105 : if (dbg == NULL)
2743 : 0 : return;
2744 : :
2745 : 105 : Dwarf* altdbg = NULL;
2746 : 105 : int altdbg_fd = -1;
2747 : :
2748 : : // DWZ handling: if we have an unsatisfied debug-alt-link, add an
2749 : : // empty string into the outgoing sourcefiles set, so the caller
2750 : : // should know that our data is incomplete.
2751 : 105 : const char *alt_name_p;
2752 : 105 : const void *alt_build_id; // elfutils-owned memory
2753 : 105 : ssize_t sz = dwelf_dwarf_gnu_debugaltlink (dbg, &alt_name_p, &alt_build_id);
2754 [ + + ]: 105 : if (sz > 0) // got one!
2755 : : {
2756 : 40 : string buildid;
2757 : 20 : unsigned char* build_id_bytes = (unsigned char*) alt_build_id;
2758 [ + + ]: 420 : for (ssize_t idx=0; idx<sz; idx++)
2759 : : {
2760 : 400 : buildid += "0123456789abcdef"[build_id_bytes[idx] >> 4];
2761 : 400 : buildid += "0123456789abcdef"[build_id_bytes[idx] & 0xf];
2762 : : }
2763 : :
2764 [ + - ]: 20 : if (verbose > 3)
2765 : 20 : obatched(clog) << "Need altdebug buildid=" << buildid << endl;
2766 : :
2767 : : // but is it unsatisfied the normal elfutils ways?
2768 : 20 : Dwarf* alt = dwarf_getalt (dbg);
2769 [ + - ]: 20 : if (alt == NULL)
2770 : : {
2771 : : // Yup, unsatisfied the normal way. Maybe we can satisfy it
2772 : : // from our own debuginfod database.
2773 : 20 : int alt_fd;
2774 : 20 : struct MHD_Response *r = 0;
2775 : 20 : try
2776 : : {
2777 [ + - ]: 20 : string artifacttype = "debuginfo";
2778 [ + - + - : 20 : r = handle_buildid (0, buildid, artifacttype, "", &alt_fd);
- + - + -
- ]
2779 : : }
2780 [ - - ]: 0 : catch (const reportable_exception& e)
2781 : : {
2782 : : // swallow exceptions
2783 : : }
2784 : :
2785 : : // NB: this is not actually recursive! This invokes the web-query
2786 : : // path, which cannot get back into the scan code paths.
2787 [ + - ]: 20 : if (r)
2788 : : {
2789 : : // Found it!
2790 : 20 : altdbg_fd = dup(alt_fd); // ok if this fails, downstream failures ok
2791 : 20 : alt = altdbg = dwarf_begin (altdbg_fd, DWARF_C_READ);
2792 : : // NB: must close this dwarf and this fd at the bottom of the function!
2793 : 20 : MHD_destroy_response (r); // will close alt_fd
2794 [ + - ]: 20 : if (alt)
2795 : 20 : dwarf_setalt (dbg, alt);
2796 : : }
2797 : : }
2798 : : else
2799 : : {
2800 : : // NB: dwarf_setalt(alt) inappropriate - already done!
2801 : : // NB: altdbg will stay 0 so nothing tries to redundantly dealloc.
2802 : : }
2803 : :
2804 [ + - ]: 20 : if (alt)
2805 : : {
2806 [ + - ]: 20 : if (verbose > 3)
2807 : 20 : obatched(clog) << "Resolved altdebug buildid=" << buildid << endl;
2808 : : }
2809 : : else // (alt == NULL) - signal possible presence of poor debuginfo
2810 : : {
2811 [ # # # # ]: 0 : debug_sourcefiles.insert("");
2812 [ # # ]: 0 : if (verbose > 3)
2813 : 0 : obatched(clog) << "Unresolved altdebug buildid=" << buildid << endl;
2814 : : }
2815 : : }
2816 : :
2817 : 105 : Dwarf_Off offset = 0;
2818 : 1155 : Dwarf_Off old_offset;
2819 : 1155 : size_t hsize;
2820 : :
2821 [ + + ]: 1155 : while (dwarf_nextcu (dbg, old_offset = offset, &offset, &hsize, NULL, NULL, NULL) == 0)
2822 : : {
2823 : 1050 : Dwarf_Die cudie_mem;
2824 : 1050 : Dwarf_Die *cudie = dwarf_offdie (dbg, old_offset + hsize, &cudie_mem);
2825 : :
2826 [ - + ]: 1050 : if (cudie == NULL)
2827 : 10 : continue;
2828 [ + + ]: 1050 : if (dwarf_tag (cudie) != DW_TAG_compile_unit)
2829 : 10 : continue;
2830 : :
2831 [ - + ]: 1040 : const char *cuname = dwarf_diename(cudie) ?: "unknown";
2832 : :
2833 : 1040 : Dwarf_Files *files;
2834 : 1040 : size_t nfiles;
2835 [ - + ]: 1040 : if (dwarf_getsrcfiles (cudie, &files, &nfiles) != 0)
2836 : 0 : continue;
2837 : :
2838 : : // extract DW_AT_comp_dir to resolve relative file names
2839 : 1040 : const char *comp_dir = "";
2840 : 1040 : const char *const *dirs;
2841 : 1040 : size_t ndirs;
2842 [ + - ]: 1040 : if (dwarf_getsrcdirs (files, &dirs, &ndirs) == 0 &&
2843 [ + - ]: 1040 : dirs[0] != NULL)
2844 : 1040 : comp_dir = dirs[0];
2845 [ - + ]: 1040 : if (comp_dir == NULL)
2846 : 0 : comp_dir = "";
2847 : :
2848 [ + + ]: 1040 : if (verbose > 3)
2849 : 164 : obatched(clog) << "searching for sources for cu=" << cuname << " comp_dir=" << comp_dir
2850 : 82 : << " #files=" << nfiles << " #dirs=" << ndirs << endl;
2851 : :
2852 [ + - - - ]: 1040 : if (comp_dir[0] == '\0' && cuname[0] != '/')
2853 : : {
2854 : : // This is a common symptom for dwz-compressed debug files,
2855 : : // where the altdebug file cannot be resolved.
2856 [ # # ]: 0 : if (verbose > 3)
2857 : 0 : obatched(clog) << "skipping cu=" << cuname << " due to empty comp_dir" << endl;
2858 : 0 : continue;
2859 : : }
2860 : :
2861 [ + + ]: 15643 : for (size_t f = 1; f < nfiles; f++)
2862 : : {
2863 : 14603 : const char *hat = dwarf_filesrc (files, f, NULL, NULL);
2864 [ - + ]: 14603 : if (hat == NULL)
2865 : 0 : continue;
2866 : :
2867 [ + + - + ]: 27820 : if (string(hat) == "<built-in>") // gcc intrinsics, don't bother record
2868 : 0 : continue;
2869 : :
2870 [ + + ]: 29206 : string waldo;
2871 [ + + ]: 14603 : if (hat[0] == '/') // absolute
2872 [ - + ]: 9046 : waldo = (string (hat));
2873 [ + - ]: 5557 : else if (comp_dir[0] != '\0') // comp_dir relative
2874 [ - + - + : 9728 : waldo = (string (comp_dir) + string("/") + string (hat));
- + - + +
+ ]
2875 : : else
2876 : : {
2877 [ # # ]: 0 : if (verbose > 3)
2878 : 0 : obatched(clog) << "skipping hat=" << hat << " due to empty comp_dir" << endl;
2879 [ # # ]: 0 : continue;
2880 : : }
2881 : :
2882 : : // NB: this is the 'waldo' that a dbginfo client will have
2883 : : // to supply for us to give them the file The comp_dir
2884 : : // prefixing is a definite complication. Otherwise we'd
2885 : : // have to return a setof comp_dirs (one per CU!) with
2886 : : // corresponding filesrc[] names, instead of one absolute
2887 : : // resoved set. Maybe we'll have to do that anyway. XXX
2888 : :
2889 [ + + ]: 14603 : if (verbose > 4)
2890 [ - + ]: 50 : obatched(clog) << waldo
2891 [ - + ]: 25 : << (debug_sourcefiles.find(waldo)==debug_sourcefiles.end() ? " new" : " dup") << endl;
2892 : :
2893 [ + - ]: 14603 : debug_sourcefiles.insert (waldo);
2894 : : }
2895 : : }
2896 : :
2897 : 105 : dwarf_end(dbg);
2898 [ + + ]: 105 : if (altdbg)
2899 : 20 : dwarf_end(altdbg);
2900 [ + + ]: 105 : if (altdbg_fd >= 0)
2901 : 20 : close(altdbg_fd);
2902 : : }
2903 : :
2904 : :
2905 : :
2906 : : static void
2907 : 517 : elf_classify (int fd, bool &executable_p, bool &debuginfo_p, string &buildid, set<string>& debug_sourcefiles)
2908 : : {
2909 : 517 : Elf *elf = elf_begin (fd, ELF_C_READ_MMAP_PRIVATE, NULL);
2910 [ + - ]: 517 : if (elf == NULL)
2911 : : return;
2912 : :
2913 : 517 : try // catch our types of errors and clean up the Elf* object
2914 : : {
2915 [ + - + + ]: 517 : if (elf_kind (elf) != ELF_K_ELF)
2916 : : {
2917 [ + - ]: 310 : elf_end (elf);
2918 : 310 : return;
2919 : : }
2920 : :
2921 : 207 : GElf_Ehdr ehdr_storage;
2922 [ + - ]: 207 : GElf_Ehdr *ehdr = gelf_getehdr (elf, &ehdr_storage);
2923 [ - + ]: 207 : if (ehdr == NULL)
2924 : : {
2925 [ # # ]: 0 : elf_end (elf);
2926 : : return;
2927 : : }
2928 : 207 : auto elf_type = ehdr->e_type;
2929 : :
2930 : 207 : const void *build_id; // elfutils-owned memory
2931 [ + - ]: 207 : ssize_t sz = dwelf_elf_gnu_build_id (elf, & build_id);
2932 [ - + ]: 207 : if (sz <= 0)
2933 : : {
2934 : : // It's not a diagnostic-worthy error for an elf file to lack build-id.
2935 : : // It might just be very old.
2936 [ # # ]: 0 : elf_end (elf);
2937 : : return;
2938 : : }
2939 : :
2940 : : // build_id is a raw byte array; convert to hexadecimal *lowercase*
2941 : 207 : unsigned char* build_id_bytes = (unsigned char*) build_id;
2942 [ + + ]: 4343 : for (ssize_t idx=0; idx<sz; idx++)
2943 : : {
2944 [ + - ]: 4136 : buildid += "0123456789abcdef"[build_id_bytes[idx] >> 4];
2945 [ + - ]: 8272 : buildid += "0123456789abcdef"[build_id_bytes[idx] & 0xf];
2946 : : }
2947 : :
2948 : : // now decide whether it's an executable - namely, any allocatable section has
2949 : : // PROGBITS;
2950 [ + + ]: 207 : if (elf_type == ET_EXEC || elf_type == ET_DYN)
2951 : : {
2952 : 182 : size_t shnum;
2953 [ + - ]: 182 : int rc = elf_getshdrnum (elf, &shnum);
2954 [ - + ]: 182 : if (rc < 0)
2955 [ # # # # ]: 0 : throw elfutils_exception(rc, "getshdrnum");
2956 : :
2957 : 182 : executable_p = false;
2958 [ + + ]: 3469 : for (size_t sc = 0; sc < shnum; sc++)
2959 : : {
2960 [ + - ]: 3381 : Elf_Scn *scn = elf_getscn (elf, sc);
2961 [ - + ]: 3381 : if (scn == NULL)
2962 : 0 : continue;
2963 : :
2964 : 3381 : GElf_Shdr shdr_mem;
2965 [ + - ]: 3381 : GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
2966 [ - + ]: 3381 : if (shdr == NULL)
2967 : 0 : continue;
2968 : :
2969 : : // allocated (loadable / vm-addr-assigned) section with available content?
2970 [ + + + + ]: 3381 : if ((shdr->sh_type == SHT_PROGBITS) && (shdr->sh_flags & SHF_ALLOC))
2971 : : {
2972 [ + + ]: 94 : if (verbose > 4)
2973 [ + - + - : 16 : obatched(clog) << "executable due to SHF_ALLOC SHT_PROGBITS sc=" << sc << endl;
+ - ]
2974 : 94 : executable_p = true;
2975 : 94 : break; // no need to keep looking for others
2976 : : }
2977 : : } // iterate over sections
2978 : : } // executable_p classification
2979 : :
2980 : : // now decide whether it's a debuginfo - namely, if it has any .debug* or .zdebug* sections
2981 : : // logic mostly stolen from fweimer@redhat.com's elfclassify drafts
2982 : 207 : size_t shstrndx;
2983 [ + - ]: 207 : int rc = elf_getshdrstrndx (elf, &shstrndx);
2984 [ - + ]: 207 : if (rc < 0)
2985 [ # # # # ]: 0 : throw elfutils_exception(rc, "getshdrstrndx");
2986 : :
2987 : : Elf_Scn *scn = NULL;
2988 : : bool symtab_p = false;
2989 : : bool bits_alloc_p = false;
2990 : 10603 : while (true)
2991 : : {
2992 [ + - ]: 5405 : scn = elf_nextscn (elf, scn);
2993 [ + + ]: 5403 : if (scn == NULL)
2994 : : break;
2995 : 5301 : GElf_Shdr shdr_storage;
2996 [ + - ]: 5301 : GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_storage);
2997 [ + - ]: 5301 : if (shdr == NULL)
2998 : : break;
2999 [ + - ]: 5301 : const char *section_name = elf_strptr (elf, shstrndx, shdr->sh_name);
3000 [ + - ]: 5303 : if (section_name == NULL)
3001 : : break;
3002 [ + + ]: 5303 : if (startswith (section_name, ".debug_line") ||
3003 [ - + ]: 5198 : startswith (section_name, ".zdebug_line"))
3004 : : {
3005 : 105 : debuginfo_p = true;
3006 [ + - ]: 105 : if (scan_source_info)
3007 : 105 : dwarf_extract_source_paths (elf, debug_sourcefiles);
3008 : : break; // expecting only one .*debug_line, so no need to look for others
3009 : : }
3010 [ + + ]: 5198 : else if (startswith (section_name, ".debug_") ||
3011 [ + # ]: 4862 : startswith (section_name, ".zdebug_"))
3012 : : {
3013 : 334 : debuginfo_p = true;
3014 : : // NB: don't break; need to parse .debug_line for sources
3015 : : }
3016 [ + + ]: 4864 : else if (shdr->sh_type == SHT_SYMTAB)
3017 : : {
3018 : : symtab_p = true;
3019 : : }
3020 : 4852 : else if (shdr->sh_type != SHT_NOBITS
3021 [ + + ]: 4852 : && shdr->sh_type != SHT_NOTE
3022 [ + + ]: 2349 : && (shdr->sh_flags & SHF_ALLOC) != 0)
3023 : : {
3024 : 1996 : bits_alloc_p = true;
3025 : : }
3026 : 5198 : }
3027 : :
3028 : : // For more expansive elf/split-debuginfo classification, we
3029 : : // want to identify as debuginfo "strip -s"-produced files
3030 : : // without .debug_info* (like libicudata), but we don't want to
3031 : : // identify "strip -g" executables (with .symtab left there).
3032 [ - + ]: 207 : if (symtab_p && !bits_alloc_p)
3033 : 0 : debuginfo_p = true;
3034 : : }
3035 [ # # ]: 0 : catch (const reportable_exception& e)
3036 : : {
3037 [ # # ]: 0 : e.report(clog);
3038 : : }
3039 : 207 : elf_end (elf);
3040 : : }
3041 : :
3042 : :
3043 : : static void
3044 : 364 : scan_source_file (const string& rps, const stat_t& st,
3045 : : sqlite_ps& ps_upsert_buildids,
3046 : : sqlite_ps& ps_upsert_files,
3047 : : sqlite_ps& ps_upsert_de,
3048 : : sqlite_ps& ps_upsert_s,
3049 : : sqlite_ps& ps_query,
3050 : : sqlite_ps& ps_scan_done,
3051 : : unsigned& fts_cached,
3052 : : unsigned& fts_executable,
3053 : : unsigned& fts_debuginfo,
3054 : : unsigned& fts_sourcefiles)
3055 : : {
3056 : : /* See if we know of it already. */
3057 : 364 : int rc = ps_query
3058 : 364 : .reset()
3059 : 364 : .bind(1, rps)
3060 : 364 : .bind(2, st.st_mtime)
3061 : 364 : .step();
3062 : 364 : ps_query.reset();
3063 [ + + ]: 364 : if (rc == SQLITE_ROW) // i.e., a result, as opposed to DONE (no results)
3064 : : // no need to recheck a file/version we already know
3065 : : // specifically, no need to elf-begin a file we already determined is non-elf
3066 : : // (so is stored with buildid=NULL)
3067 : : {
3068 : 184 : fts_cached++;
3069 : 184 : return;
3070 : : }
3071 : :
3072 : 180 : bool executable_p = false, debuginfo_p = false; // E and/or D
3073 [ + - ]: 360 : string buildid;
3074 [ + - + + ]: 360 : set<string> sourcefiles;
3075 : :
3076 [ + - ]: 180 : int fd = open (rps.c_str(), O_RDONLY);
3077 : 180 : try
3078 : : {
3079 [ + - ]: 180 : if (fd >= 0)
3080 [ + - ]: 180 : elf_classify (fd, executable_p, debuginfo_p, buildid, sourcefiles);
3081 : : else
3082 [ # # # # : 0 : throw libc_exception(errno, string("open ") + rps);
# # # # #
# ]
3083 [ + - ]: 180 : add_metric ("scanned_bytes_total","source","file",
3084 [ + - + - : 360 : st.st_size);
+ - - + -
+ + - - -
- - - - ]
3085 [ + - + - : 360 : inc_metric ("scanned_files_total","source","file");
+ - + - -
+ - + - -
- - ]
3086 : : }
3087 : : // NB: we catch exceptions here too, so that we can
3088 : : // cache the corrupt-elf case (!executable_p &&
3089 : : // !debuginfo_p) just below, just as if we had an
3090 : : // EPERM error from open(2).
3091 [ - - ]: 0 : catch (const reportable_exception& e)
3092 : : {
3093 [ - - ]: 0 : e.report(clog);
3094 : : }
3095 : :
3096 [ + - ]: 180 : if (fd >= 0)
3097 [ + - ]: 180 : close (fd);
3098 : :
3099 : : // register this file name in the interning table
3100 : 180 : ps_upsert_files
3101 [ + - ]: 180 : .reset()
3102 [ + - ]: 180 : .bind(1, rps)
3103 [ + - ]: 180 : .step_ok_done();
3104 : :
3105 [ + + ]: 180 : if (buildid == "")
3106 : : {
3107 : : // no point storing an elf file without buildid
3108 : 149 : executable_p = false;
3109 : 149 : debuginfo_p = false;
3110 : : }
3111 : : else
3112 : : {
3113 : : // register this build-id in the interning table
3114 : 31 : ps_upsert_buildids
3115 [ + - ]: 31 : .reset()
3116 [ + - ]: 31 : .bind(1, buildid)
3117 [ + - ]: 31 : .step_ok_done();
3118 : : }
3119 : :
3120 [ + + ]: 180 : if (executable_p)
3121 : 19 : fts_executable ++;
3122 [ + + ]: 180 : if (debuginfo_p)
3123 : 19 : fts_debuginfo ++;
3124 [ + + + + ]: 180 : if (executable_p || debuginfo_p)
3125 : : {
3126 : 31 : ps_upsert_de
3127 [ + - ]: 31 : .reset()
3128 [ + - ]: 31 : .bind(1, buildid)
3129 [ + + + - ]: 43 : .bind(2, debuginfo_p ? 1 : 0)
3130 [ + + + - ]: 43 : .bind(3, executable_p ? 1 : 0)
3131 [ + - ]: 31 : .bind(4, rps)
3132 [ + - ]: 31 : .bind(5, st.st_mtime)
3133 [ + - ]: 31 : .step_ok_done();
3134 : : }
3135 [ + + ]: 180 : if (executable_p)
3136 [ + - + - : 38 : inc_metric("found_executable_total","source","files");
+ - + - -
+ - + - -
- - ]
3137 [ + + ]: 180 : if (debuginfo_p)
3138 [ + - + - : 38 : inc_metric("found_debuginfo_total","source","files");
+ - + - -
+ - + - -
- - ]
3139 : :
3140 [ + + + - ]: 199 : if (sourcefiles.size() && buildid != "")
3141 : : {
3142 : 19 : fts_sourcefiles += sourcefiles.size();
3143 : :
3144 [ + + ]: 1496 : for (auto&& dwarfsrc : sourcefiles)
3145 : : {
3146 [ - + ]: 1477 : char *srp = realpath(dwarfsrc.c_str(), NULL);
3147 [ + + ]: 1477 : if (srp == NULL) // also if DWZ unresolved dwarfsrc=""
3148 : 18 : continue; // unresolvable files are not a serious problem
3149 : : // throw libc_exception(errno, "fts/file realpath " + srcpath);
3150 [ + - ]: 2918 : string srps = string(srp);
3151 : 1459 : free (srp);
3152 : :
3153 : 1459 : struct stat sfs;
3154 : 1459 : rc = stat(srps.c_str(), &sfs);
3155 [ - + ]: 1459 : if (rc != 0)
3156 [ - - ]: 18 : continue;
3157 : :
3158 [ + - ]: 1459 : if (verbose > 2)
3159 [ + - + - : 4377 : obatched(clog) << "recorded buildid=" << buildid << " file=" << srps
- - ]
3160 [ + - + - : 1459 : << " mtime=" << sfs.st_mtime
+ - + - ]
3161 [ + - + - : 1459 : << " as source " << dwarfsrc << endl;
+ - ]
3162 : :
3163 : 1459 : ps_upsert_files
3164 [ + - ]: 1459 : .reset()
3165 [ + - ]: 1459 : .bind(1, srps)
3166 [ + - ]: 1459 : .step_ok_done();
3167 : :
3168 : : // PR25548: store canonicalized dwarfsrc path
3169 [ + - + - ]: 2918 : string dwarfsrc_canon = canon_pathname (dwarfsrc);
3170 [ + + ]: 1459 : if (dwarfsrc_canon != dwarfsrc)
3171 : : {
3172 [ + + ]: 254 : if (verbose > 3)
3173 [ + - + - : 10 : obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
+ - + - +
- ]
3174 : : }
3175 : :
3176 : 1459 : ps_upsert_files
3177 [ + - ]: 1459 : .reset()
3178 [ + - ]: 1459 : .bind(1, dwarfsrc_canon)
3179 [ + - ]: 1459 : .step_ok_done();
3180 : :
3181 : 1459 : ps_upsert_s
3182 [ + - ]: 1459 : .reset()
3183 [ + - ]: 1459 : .bind(1, buildid)
3184 [ + - ]: 1459 : .bind(2, dwarfsrc_canon)
3185 [ + - ]: 1459 : .bind(3, srps)
3186 [ + - ]: 1459 : .bind(4, sfs.st_mtime)
3187 [ + - ]: 1459 : .step_ok_done();
3188 : :
3189 [ + - + - : 2918 : inc_metric("found_sourcerefs_total","source","files");
+ - + - -
+ - + + -
- - - - -
- ]
3190 : : }
3191 : : }
3192 : :
3193 : 180 : ps_scan_done
3194 [ + - ]: 180 : .reset()
3195 [ + - ]: 180 : .bind(1, rps)
3196 [ + - ]: 180 : .bind(2, st.st_mtime)
3197 [ + - ]: 180 : .bind(3, st.st_size)
3198 [ + - ]: 180 : .step_ok_done();
3199 : :
3200 [ + - ]: 180 : if (verbose > 2)
3201 [ + - + - ]: 540 : obatched(clog) << "recorded buildid=" << buildid << " file=" << rps
3202 [ + - + - : 180 : << " mtime=" << st.st_mtime << " atype="
+ - + - ]
3203 : : << (executable_p ? "E" : "")
3204 [ + - + + : 502 : << (debuginfo_p ? "D" : "") << endl;
+ - + + +
- + - ]
3205 : : }
3206 : :
3207 : :
3208 : :
3209 : :
3210 : :
3211 : : // Analyze given archive file of given age; record buildids / exec/debuginfo-ness of its
3212 : : // constituent files with given upsert statements.
3213 : : static void
3214 : 181 : archive_classify (const string& rps, string& archive_extension,
3215 : : sqlite_ps& ps_upsert_buildids, sqlite_ps& ps_upsert_files,
3216 : : sqlite_ps& ps_upsert_de, sqlite_ps& ps_upsert_sref, sqlite_ps& ps_upsert_sdef,
3217 : : time_t mtime,
3218 : : unsigned& fts_executable, unsigned& fts_debuginfo, unsigned& fts_sref, unsigned& fts_sdef,
3219 : : bool& fts_sref_complete_p)
3220 : : {
3221 : 181 : string archive_decoder = "/dev/null";
3222 [ + + ]: 448 : for (auto&& arch : scan_archives)
3223 [ + + ]: 267 : if (string_endswith(rps, arch.first))
3224 : : {
3225 [ + - ]: 181 : archive_extension = arch.first;
3226 [ + - ]: 448 : archive_decoder = arch.second;
3227 : : }
3228 : :
3229 : 181 : FILE* fp;
3230 : 181 : defer_dtor<FILE*,int>::dtor_fn dfn;
3231 [ + + ]: 181 : if (archive_decoder != "cat")
3232 : : {
3233 [ + - + - : 48 : string popen_cmd = archive_decoder + " " + shell_escape(rps);
+ - - + -
- - - ]
3234 [ + - ]: 16 : fp = popen (popen_cmd.c_str(), "r"); // "e" O_CLOEXEC?
3235 : 16 : dfn = pclose;
3236 [ - + ]: 16 : if (fp == NULL)
3237 [ # # # # : 0 : throw libc_exception (errno, string("popen ") + popen_cmd);
# # # # #
# ]
3238 : : }
3239 : : else
3240 : : {
3241 [ + - ]: 165 : fp = fopen (rps.c_str(), "r");
3242 : 165 : dfn = fclose;
3243 [ - + ]: 165 : if (fp == NULL)
3244 [ # # # # : 0 : throw libc_exception (errno, string("fopen ") + rps);
# # # # #
# ]
3245 : : }
3246 [ + + ]: 181 : defer_dtor<FILE*,int> fp_closer (fp, dfn);
3247 : :
3248 : 181 : struct archive *a;
3249 [ + - ]: 181 : a = archive_read_new();
3250 [ - + ]: 181 : if (a == NULL)
3251 [ # # # # ]: 0 : throw archive_exception("cannot create archive reader");
3252 : 181 : defer_dtor<struct archive*,int> archive_closer (a, archive_read_free);
3253 : :
3254 [ + - ]: 181 : int rc = archive_read_support_format_all(a);
3255 [ - + ]: 181 : if (rc != ARCHIVE_OK)
3256 [ # # # # ]: 0 : throw archive_exception(a, "cannot select all formats");
3257 [ + - ]: 181 : rc = archive_read_support_filter_all(a);
3258 [ - + ]: 181 : if (rc != ARCHIVE_OK)
3259 [ # # # # ]: 0 : throw archive_exception(a, "cannot select all filters");
3260 : :
3261 [ + - ]: 181 : rc = archive_read_open_FILE (a, fp);
3262 [ - + ]: 181 : if (rc != ARCHIVE_OK)
3263 : : {
3264 [ # # # # : 0 : obatched(clog) << "cannot open archive from pipe " << rps << endl;
# # ]
3265 [ # # # # ]: 0 : throw archive_exception(a, "cannot open archive from pipe");
3266 : : }
3267 : :
3268 [ + + ]: 181 : if (verbose > 3)
3269 [ + - + - : 346 : obatched(clog) << "libarchive scanning " << rps << endl;
+ - ]
3270 : :
3271 : : bool any_exceptions = false;
3272 : 1282 : while(1) // parse archive entries
3273 : : {
3274 [ + - ]: 1282 : if (interrupted)
3275 : : break;
3276 : :
3277 : 1282 : try
3278 : : {
3279 : 1282 : struct archive_entry *e;
3280 [ + - ]: 1282 : rc = archive_read_next_header (a, &e);
3281 [ + + ]: 1282 : if (rc != ARCHIVE_OK)
3282 : : break;
3283 : :
3284 [ + - + + ]: 1101 : if (! S_ISREG(archive_entry_mode (e))) // skip non-files completely
3285 : 764 : continue;
3286 : :
3287 [ + - ]: 674 : string fn = canonicalized_archive_entry_pathname (e);
3288 : :
3289 [ + + ]: 337 : if (verbose > 3)
3290 [ + - + - : 634 : obatched(clog) << "libarchive checking " << fn << endl;
+ - - - ]
3291 : :
3292 : : // extract this file to a temporary file
3293 : 337 : char* tmppath = NULL;
3294 : 337 : rc = asprintf (&tmppath, "%s/debuginfod.XXXXXX", tmpdir.c_str());
3295 [ - + ]: 337 : if (rc < 0)
3296 [ # # # # ]: 0 : throw libc_exception (ENOMEM, "cannot allocate tmppath");
3297 : 0 : defer_dtor<void*,void> tmmpath_freer (tmppath, free);
3298 [ + - ]: 337 : int fd = mkstemp (tmppath);
3299 [ - + ]: 337 : if (fd < 0)
3300 [ # # # # ]: 0 : throw libc_exception (errno, "cannot create temporary file");
3301 : 337 : unlink (tmppath); // unlink now so OS will release the file as soon as we close the fd
3302 [ + + ]: 337 : defer_dtor<int,int> minifd_closer (fd, close);
3303 : :
3304 [ + - ]: 337 : rc = archive_read_data_into_fd (a, fd);
3305 [ - + ]: 337 : if (rc != ARCHIVE_OK)
3306 [ # # # # ]: 0 : throw archive_exception(a, "cannot extract file");
3307 : :
3308 : : // finally ... time to run elf_classify on this bad boy and update the database
3309 : 337 : bool executable_p = false, debuginfo_p = false;
3310 [ + - ]: 674 : string buildid;
3311 [ + - + + ]: 674 : set<string> sourcefiles;
3312 [ + - ]: 337 : elf_classify (fd, executable_p, debuginfo_p, buildid, sourcefiles);
3313 : : // NB: might throw
3314 : :
3315 [ + + ]: 337 : if (buildid != "") // intern buildid
3316 : : {
3317 : 176 : ps_upsert_buildids
3318 [ + - ]: 176 : .reset()
3319 [ + - ]: 176 : .bind(1, buildid)
3320 [ + - ]: 176 : .step_ok_done();
3321 : : }
3322 : :
3323 : 337 : ps_upsert_files // register this rpm constituent file name in interning table
3324 [ + - ]: 337 : .reset()
3325 [ + - ]: 337 : .bind(1, fn)
3326 [ + - ]: 337 : .step_ok_done();
3327 : :
3328 [ + + ]: 337 : if (sourcefiles.size() > 0) // sref records needed
3329 : : {
3330 : : // NB: we intern each source file once. Once raw, as it
3331 : : // appears in the DWARF file list coming back from
3332 : : // elf_classify() - because it'll end up in the
3333 : : // _norm.artifactsrc column. We don't also put another
3334 : : // version with a '.' at the front, even though that's
3335 : : // how rpm/cpio packs names, because we hide that from
3336 : : // the database for storage efficiency.
3337 : :
3338 [ + + ]: 308 : for (auto&& s : sourcefiles)
3339 : : {
3340 [ - + ]: 232 : if (s == "")
3341 : : {
3342 : 0 : fts_sref_complete_p = false;
3343 : 0 : continue;
3344 : : }
3345 : :
3346 : : // PR25548: store canonicalized source path
3347 : 232 : const string& dwarfsrc = s;
3348 [ + - ]: 464 : string dwarfsrc_canon = canon_pathname (dwarfsrc);
3349 [ + + ]: 232 : if (dwarfsrc_canon != dwarfsrc)
3350 : : {
3351 [ + - ]: 16 : if (verbose > 3)
3352 [ + - + - : 32 : obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
+ - + - +
- - - ]
3353 : : }
3354 : :
3355 : 232 : ps_upsert_files
3356 [ + - ]: 232 : .reset()
3357 [ + - ]: 232 : .bind(1, dwarfsrc_canon)
3358 [ + - ]: 232 : .step_ok_done();
3359 : :
3360 : 232 : ps_upsert_sref
3361 [ + - ]: 232 : .reset()
3362 [ + - ]: 232 : .bind(1, buildid)
3363 [ + - ]: 232 : .bind(2, dwarfsrc_canon)
3364 [ + - ]: 232 : .step_ok_done();
3365 : :
3366 [ + - ]: 232 : fts_sref ++;
3367 : : }
3368 : : }
3369 : :
3370 [ + + ]: 337 : if (executable_p)
3371 : 75 : fts_executable ++;
3372 [ + + ]: 337 : if (debuginfo_p)
3373 : 101 : fts_debuginfo ++;
3374 : :
3375 [ + + + + ]: 337 : if (executable_p || debuginfo_p)
3376 : : {
3377 : 176 : ps_upsert_de
3378 [ + - ]: 176 : .reset()
3379 [ + - ]: 176 : .bind(1, buildid)
3380 [ + + + - ]: 251 : .bind(2, debuginfo_p ? 1 : 0)
3381 [ + + + - ]: 277 : .bind(3, executable_p ? 1 : 0)
3382 [ + - ]: 176 : .bind(4, rps)
3383 [ + - ]: 176 : .bind(5, mtime)
3384 [ + - ]: 176 : .bind(6, fn)
3385 [ + - ]: 176 : .step_ok_done();
3386 : : }
3387 : : else // potential source - sdef record
3388 : : {
3389 : 161 : fts_sdef ++;
3390 : 161 : ps_upsert_sdef
3391 [ + - ]: 161 : .reset()
3392 [ + - ]: 161 : .bind(1, rps)
3393 [ + - ]: 161 : .bind(2, mtime)
3394 [ + - ]: 161 : .bind(3, fn)
3395 [ + - ]: 161 : .step_ok_done();
3396 : : }
3397 : :
3398 [ + - + + : 337 : if ((verbose > 2) && (executable_p || debuginfo_p))
+ + ]
3399 [ + - + - ]: 528 : obatched(clog) << "recorded buildid=" << buildid << " rpm=" << rps << " file=" << fn
3400 [ + - + - : 176 : << " mtime=" << mtime << " atype="
+ - + - +
- + - ]
3401 : : << (executable_p ? "E" : "")
3402 : : << (debuginfo_p ? "D" : "")
3403 [ + - + + : 352 : << " sourcefiles=" << sourcefiles.size() << endl;
+ - + + +
- + - + -
+ - ]
3404 : :
3405 : : }
3406 [ - - ]: 0 : catch (const reportable_exception& e)
3407 : : {
3408 [ - - ]: 0 : e.report(clog);
3409 : 0 : any_exceptions = true;
3410 : : // NB: but we allow the libarchive iteration to continue, in
3411 : : // case we can still gather some useful information. That
3412 : : // would allow some webapi queries to work, until later when
3413 : : // this archive is rescanned. (Its vitals won't go into the
3414 : : // _file_mtime_scanned table until after a successful scan.)
3415 : : }
3416 : : }
3417 : :
3418 [ - + ]: 181 : if (any_exceptions)
3419 [ # # # # ]: 0 : throw reportable_exception("exceptions encountered during archive scan");
3420 : 181 : }
3421 : :
3422 : :
3423 : :
3424 : : // scan for archive files such as .rpm
3425 : : static void
3426 : 351 : scan_archive_file (const string& rps, const stat_t& st,
3427 : : sqlite_ps& ps_upsert_buildids,
3428 : : sqlite_ps& ps_upsert_files,
3429 : : sqlite_ps& ps_upsert_de,
3430 : : sqlite_ps& ps_upsert_sref,
3431 : : sqlite_ps& ps_upsert_sdef,
3432 : : sqlite_ps& ps_query,
3433 : : sqlite_ps& ps_scan_done,
3434 : : unsigned& fts_cached,
3435 : : unsigned& fts_executable,
3436 : : unsigned& fts_debuginfo,
3437 : : unsigned& fts_sref,
3438 : : unsigned& fts_sdef)
3439 : : {
3440 : : /* See if we know of it already. */
3441 : 351 : int rc = ps_query
3442 : 351 : .reset()
3443 : 351 : .bind(1, rps)
3444 : 351 : .bind(2, st.st_mtime)
3445 : 351 : .step();
3446 : 351 : ps_query.reset();
3447 [ + + ]: 351 : if (rc == SQLITE_ROW) // i.e., a result, as opposed to DONE (no results)
3448 : : // no need to recheck a file/version we already know
3449 : : // specifically, no need to parse this archive again, since we already have
3450 : : // it as a D or E or S record,
3451 : : // (so is stored with buildid=NULL)
3452 : : {
3453 : 170 : fts_cached ++;
3454 : 170 : return;
3455 : : }
3456 : :
3457 : : // intern the archive file name
3458 : 181 : ps_upsert_files
3459 : 181 : .reset()
3460 : 181 : .bind(1, rps)
3461 : 181 : .step_ok_done();
3462 : :
3463 : : // extract the archive contents
3464 : 181 : unsigned my_fts_executable = 0, my_fts_debuginfo = 0, my_fts_sref = 0, my_fts_sdef = 0;
3465 : 181 : bool my_fts_sref_complete_p = true;
3466 : 181 : bool any_exceptions = false;
3467 : 181 : try
3468 : : {
3469 [ + - ]: 362 : string archive_extension;
3470 : 181 : archive_classify (rps, archive_extension,
3471 : : ps_upsert_buildids, ps_upsert_files,
3472 : : ps_upsert_de, ps_upsert_sref, ps_upsert_sdef, // dalt
3473 [ + - ]: 181 : st.st_mtime,
3474 : : my_fts_executable, my_fts_debuginfo, my_fts_sref, my_fts_sdef,
3475 : : my_fts_sref_complete_p);
3476 [ + - ]: 181 : add_metric ("scanned_bytes_total","source",archive_extension + " archive",
3477 [ + - + - : 362 : st.st_size);
+ - - + +
+ - - -
- ]
3478 [ + - + - : 362 : inc_metric ("scanned_files_total","source",archive_extension + " archive");
+ - + - -
+ + + - -
- - ]
3479 [ + - + - ]: 362 : add_metric("found_debuginfo_total","source",archive_extension + " archive",
3480 [ + - + - : 362 : my_fts_debuginfo);
- + + + -
- - - ]
3481 [ + - + - ]: 362 : add_metric("found_executable_total","source",archive_extension + " archive",
3482 [ + - + - : 362 : my_fts_executable);
- + + + -
- - - ]
3483 [ + - + - : 543 : add_metric("found_sourcerefs_total","source",archive_extension + " archive",
- + - - ]
3484 [ + - + - : 362 : my_fts_sref);
- + + + -
- - - ]
3485 : : }
3486 [ - - ]: 0 : catch (const reportable_exception& e)
3487 : : {
3488 [ - - ]: 0 : e.report(clog);
3489 : 0 : any_exceptions = true;
3490 : : }
3491 : :
3492 [ + - ]: 181 : if (verbose > 2)
3493 [ + - ]: 543 : obatched(clog) << "scanned archive=" << rps
3494 [ + - + - ]: 181 : << " mtime=" << st.st_mtime
3495 [ + - ]: 181 : << " executables=" << my_fts_executable
3496 [ + - + - ]: 181 : << " debuginfos=" << my_fts_debuginfo
3497 [ + - + - ]: 181 : << " srefs=" << my_fts_sref
3498 [ + - + - ]: 181 : << " sdefs=" << my_fts_sdef
3499 [ + - + - : 181 : << " exceptions=" << any_exceptions
+ - + - ]
3500 : 181 : << endl;
3501 : :
3502 : 181 : fts_executable += my_fts_executable;
3503 : 181 : fts_debuginfo += my_fts_debuginfo;
3504 : 181 : fts_sref += my_fts_sref;
3505 : 181 : fts_sdef += my_fts_sdef;
3506 : :
3507 [ - + ]: 181 : if (any_exceptions)
3508 [ # # # # ]: 0 : throw reportable_exception("exceptions encountered during archive scan");
3509 : :
3510 [ + - ]: 181 : if (my_fts_sref_complete_p) // leave incomplete?
3511 : 181 : ps_scan_done
3512 : 181 : .reset()
3513 : 181 : .bind(1, rps)
3514 : 181 : .bind(2, st.st_mtime)
3515 : 181 : .bind(3, st.st_size)
3516 : 181 : .step_ok_done();
3517 : : }
3518 : :
3519 : :
3520 : :
3521 : : ////////////////////////////////////////////////////////////////////////
3522 : :
3523 : :
3524 : :
3525 : : // The thread that consumes file names off of the scanq. We hold
3526 : : // the persistent sqlite_ps's at this level and delegate file/archive
3527 : : // scanning to other functions.
3528 : : static void*
3529 : 128 : thread_main_scanner (void* arg)
3530 : : {
3531 : 128 : (void) arg;
3532 : :
3533 : : // all the prepared statements fit to use, the _f_ set:
3534 [ + - + - : 512 : sqlite_ps ps_f_upsert_buildids (db, "file-buildids-intern", "insert or ignore into " BUILDIDS "_buildids VALUES (NULL, ?);");
+ - + - -
- ]
3535 [ + - + - : 512 : sqlite_ps ps_f_upsert_files (db, "file-files-intern", "insert or ignore into " BUILDIDS "_files VALUES (NULL, ?);");
+ - + - +
- - - ]
3536 [ + - ]: 128 : sqlite_ps ps_f_upsert_de (db, "file-de-upsert",
3537 : : "insert or ignore into " BUILDIDS "_f_de "
3538 : : "(buildid, debuginfo_p, executable_p, file, mtime) "
3539 : : "values ((select id from " BUILDIDS "_buildids where hex = ?),"
3540 : : " ?,?,"
3541 [ + - + - : 512 : " (select id from " BUILDIDS "_files where name = ?), ?);");
+ - + - +
- - - ]
3542 [ + - ]: 128 : sqlite_ps ps_f_upsert_s (db, "file-s-upsert",
3543 : : "insert or ignore into " BUILDIDS "_f_s "
3544 : : "(buildid, artifactsrc, file, mtime) "
3545 : : "values ((select id from " BUILDIDS "_buildids where hex = ?),"
3546 : : " (select id from " BUILDIDS "_files where name = ?),"
3547 : : " (select id from " BUILDIDS "_files where name = ?),"
3548 [ + - + - : 512 : " ?);");
+ - + - +
- - - ]
3549 [ + - ]: 128 : sqlite_ps ps_f_query (db, "file-negativehit-find",
3550 : : "select 1 from " BUILDIDS "_file_mtime_scanned where sourcetype = 'F' "
3551 [ + - + - : 512 : "and file = (select id from " BUILDIDS "_files where name = ?) and mtime = ?;");
+ - + - +
- - - ]
3552 [ + - ]: 128 : sqlite_ps ps_f_scan_done (db, "file-scanned",
3553 : : "insert or ignore into " BUILDIDS "_file_mtime_scanned (sourcetype, file, mtime, size)"
3554 [ + - + - : 512 : "values ('F', (select id from " BUILDIDS "_files where name = ?), ?, ?);");
+ - + - +
- - - ]
3555 : :
3556 : : // and now for the _r_ set
3557 [ + - + - : 512 : sqlite_ps ps_r_upsert_buildids (db, "rpm-buildid-intern", "insert or ignore into " BUILDIDS "_buildids VALUES (NULL, ?);");
+ - + - +
- - - ]
3558 [ + - + - : 512 : sqlite_ps ps_r_upsert_files (db, "rpm-file-intern", "insert or ignore into " BUILDIDS "_files VALUES (NULL, ?);");
+ - + - +
- - - ]
3559 [ + - ]: 128 : sqlite_ps ps_r_upsert_de (db, "rpm-de-insert",
3560 : : "insert or ignore into " BUILDIDS "_r_de (buildid, debuginfo_p, executable_p, file, mtime, content) values ("
3561 : : "(select id from " BUILDIDS "_buildids where hex = ?), ?, ?, "
3562 : : "(select id from " BUILDIDS "_files where name = ?), ?, "
3563 [ + - + - : 512 : "(select id from " BUILDIDS "_files where name = ?));");
+ - + - +
- - - ]
3564 [ + - ]: 128 : sqlite_ps ps_r_upsert_sref (db, "rpm-sref-insert",
3565 : : "insert or ignore into " BUILDIDS "_r_sref (buildid, artifactsrc) values ("
3566 : : "(select id from " BUILDIDS "_buildids where hex = ?), "
3567 [ + - + - : 512 : "(select id from " BUILDIDS "_files where name = ?));");
+ - + - +
- - - ]
3568 [ + - ]: 128 : sqlite_ps ps_r_upsert_sdef (db, "rpm-sdef-insert",
3569 : : "insert or ignore into " BUILDIDS "_r_sdef (file, mtime, content) values ("
3570 : : "(select id from " BUILDIDS "_files where name = ?), ?,"
3571 [ + - + - : 512 : "(select id from " BUILDIDS "_files where name = ?));");
+ - + - +
- - - ]
3572 [ + - ]: 128 : sqlite_ps ps_r_query (db, "rpm-negativehit-query",
3573 : : "select 1 from " BUILDIDS "_file_mtime_scanned where "
3574 [ + - + - : 512 : "sourcetype = 'R' and file = (select id from " BUILDIDS "_files where name = ?) and mtime = ?;");
+ - + - +
- - - ]
3575 [ + - ]: 128 : sqlite_ps ps_r_scan_done (db, "rpm-scanned",
3576 : : "insert or ignore into " BUILDIDS "_file_mtime_scanned (sourcetype, file, mtime, size)"
3577 [ + - + - : 384 : "values ('R', (select id from " BUILDIDS "_files where name = ?), ?, ?);");
+ - + - +
- - - ]
3578 : :
3579 : :
3580 : 128 : unsigned fts_cached = 0, fts_executable = 0, fts_debuginfo = 0, fts_sourcefiles = 0;
3581 : 128 : unsigned fts_sref = 0, fts_sdef = 0;
3582 : :
3583 [ + - + - : 256 : add_metric("thread_count", "role", "scan", 1);
+ - + - -
+ - + + -
- - - - ]
3584 [ + - + - : 256 : add_metric("thread_busy", "role", "scan", 1);
+ - + - -
+ - + - -
- - ]
3585 [ + + ]: 726 : while (! interrupted)
3586 : : {
3587 [ + - ]: 1068 : scan_payload p;
3588 : :
3589 [ + - + - : 1196 : add_metric("thread_busy", "role", "scan", -1);
+ - + - -
+ - + + -
- - - - -
- ]
3590 [ + - ]: 598 : bool gotone = scanq.wait_front(p);
3591 [ + - + - : 1196 : add_metric("thread_busy", "role", "scan", 1);
+ - + - -
+ - + + +
- - - - ]
3592 : :
3593 [ + + - + ]: 598 : if (! gotone) continue; // go back to waiting
3594 : :
3595 : 470 : try
3596 : : {
3597 : 470 : bool scan_archive = false;
3598 [ + + ]: 1120 : for (auto&& arch : scan_archives)
3599 [ + + ]: 650 : if (string_endswith(p.first, arch.first))
3600 : 351 : scan_archive = true;
3601 : :
3602 [ + + ]: 470 : if (scan_archive)
3603 [ + - ]: 351 : scan_archive_file (p.first, p.second,
3604 : : ps_r_upsert_buildids,
3605 : : ps_r_upsert_files,
3606 : : ps_r_upsert_de,
3607 : : ps_r_upsert_sref,
3608 : : ps_r_upsert_sdef,
3609 : : ps_r_query,
3610 : : ps_r_scan_done,
3611 : : fts_cached,
3612 : : fts_executable,
3613 : : fts_debuginfo,
3614 : : fts_sref,
3615 : : fts_sdef);
3616 : :
3617 [ + + ]: 470 : if (scan_files) // NB: maybe "else if" ?
3618 [ + - ]: 364 : scan_source_file (p.first, p.second,
3619 : : ps_f_upsert_buildids,
3620 : : ps_f_upsert_files,
3621 : : ps_f_upsert_de,
3622 : : ps_f_upsert_s,
3623 : : ps_f_query,
3624 : : ps_f_scan_done,
3625 : : fts_cached, fts_executable, fts_debuginfo, fts_sourcefiles);
3626 : : }
3627 [ - - ]: 0 : catch (const reportable_exception& e)
3628 : : {
3629 [ - - ]: 0 : e.report(cerr);
3630 : : }
3631 : :
3632 [ + - ]: 470 : scanq.done_front(); // let idlers run
3633 : :
3634 : 470 : if (fts_cached || fts_executable || fts_debuginfo || fts_sourcefiles || fts_sref || fts_sdef)
3635 : : {} // NB: not just if a successful scan - we might have encountered -ENOSPC & failed
3636 [ + - + - : 940 : (void) statfs_free_enough_p(db_path, "database"); // report sqlite filesystem size
+ - ]
3637 [ + - + - : 940 : (void) statfs_free_enough_p(tmpdir, "tmpdir"); // this too, in case of fdcache/tmpfile usage
+ - ]
3638 : :
3639 : : // finished a scanning step -- not a "loop", because we just
3640 : : // consume the traversal loop's work, whenever
3641 [ + - + - : 940 : inc_metric("thread_work_total","role","scan");
+ - + - -
+ - + + -
- - - - -
- ]
3642 : : }
3643 : :
3644 : :
3645 [ + - + - : 256 : add_metric("thread_busy", "role", "scan", -1);
+ - + - -
+ - + - -
- - ]
3646 : 256 : return 0;
3647 : : }
3648 : :
3649 : :
3650 : :
3651 : : // The thread that traverses all the source_paths and enqueues all the
3652 : : // matching files into the file/archive scan queue.
3653 : : static void
3654 : 59 : scan_source_paths()
3655 : : {
3656 : : // NB: fedora 31 glibc/fts(3) crashes inside fts_read() on empty
3657 : : // path list.
3658 [ + + ]: 59 : if (source_paths.empty())
3659 : 1 : return;
3660 : :
3661 : : // Turn the source_paths into an fts(3)-compatible char**. Since
3662 : : // source_paths[] does not change after argv processing, the
3663 : : // c_str()'s are safe to keep around awile.
3664 : 116 : vector<const char *> sps;
3665 [ + + ]: 157 : for (auto&& sp: source_paths)
3666 [ + - ]: 99 : sps.push_back(sp.c_str());
3667 [ + - - - ]: 58 : sps.push_back(NULL);
3668 : :
3669 [ + + + - ]: 109 : FTS *fts = fts_open ((char * const *)sps.data(),
3670 : : (traverse_logical ? FTS_LOGICAL : FTS_PHYSICAL|FTS_XDEV)
3671 : : | FTS_NOCHDIR /* multithreaded */,
3672 : : NULL);
3673 [ - + ]: 58 : if (fts == NULL)
3674 [ # # # # ]: 0 : throw libc_exception(errno, "cannot fts_open");
3675 [ + - ]: 58 : defer_dtor<FTS*,int> fts_cleanup (fts, fts_close);
3676 : :
3677 : 58 : struct timespec ts_start, ts_end;
3678 : 58 : clock_gettime (CLOCK_MONOTONIC, &ts_start);
3679 : 58 : unsigned fts_scanned = 0, fts_regex = 0;
3680 : :
3681 : 1000 : FTSENT *f;
3682 [ + - + + ]: 1000 : while ((f = fts_read (fts)) != NULL)
3683 : : {
3684 [ + - ]: 942 : if (interrupted) break;
3685 : :
3686 [ - + ]: 942 : if (sigusr2 != forced_groom_count) // stop early if groom triggered
3687 : : {
3688 [ # # ]: 0 : scanq.clear(); // clear previously issued work for scanner threads
3689 : : break;
3690 : : }
3691 : :
3692 : 942 : fts_scanned ++;
3693 : :
3694 [ + - ]: 942 : if (verbose > 2)
3695 [ + - + - : 1884 : obatched(clog) << "fts traversing " << f->fts_path << endl;
+ - ]
3696 : :
3697 [ + + + + : 942 : switch (f->fts_info)
+ ]
3698 : : {
3699 : 514 : case FTS_F:
3700 : 514 : {
3701 : : /* Found a file. Convert it to an absolute path, so
3702 : : the buildid database does not have relative path
3703 : : names that are unresolvable from a subsequent run
3704 : : in a different cwd. */
3705 [ - + ]: 514 : char *rp = realpath(f->fts_path, NULL);
3706 [ - + ]: 514 : if (rp == NULL)
3707 : 0 : continue; // ignore dangling symlink or such
3708 [ + - ]: 514 : string rps = string(rp);
3709 : 514 : free (rp);
3710 : :
3711 [ + - ]: 514 : bool ri = !regexec (&file_include_regex, rps.c_str(), 0, 0, 0);
3712 [ + - ]: 514 : bool rx = !regexec (&file_exclude_regex, rps.c_str(), 0, 0, 0);
3713 [ + + ]: 514 : if (!ri || rx)
3714 : : {
3715 [ + - ]: 44 : if (verbose > 3)
3716 [ + - ]: 88 : obatched(clog) << "fts skipped by regex "
3717 [ + + + - : 48 : << (!ri ? "I" : "") << (rx ? "X" : "") << endl;
+ + + - +
- ]
3718 : 44 : fts_regex ++;
3719 [ + + ]: 44 : if (!ri)
3720 [ + - + - : 4 : inc_metric("traversed_total","type","file-skipped-I");
+ - + - -
+ - + - -
- - - - ]
3721 [ + + ]: 44 : if (rx)
3722 [ + - + - : 84 : inc_metric("traversed_total","type","file-skipped-X");
+ - + - -
+ - + - -
- - ]
3723 : : }
3724 : : else
3725 : : {
3726 [ + - + - ]: 470 : scanq.push_back (make_pair(rps, *f->fts_statp));
3727 [ + - + - : 940 : inc_metric("traversed_total","type","file");
+ - + - -
+ - + - -
- - - - ]
3728 : : }
3729 : : }
3730 : 514 : break;
3731 : :
3732 : 2 : case FTS_ERR:
3733 : 2 : case FTS_NS:
3734 : : // report on some types of errors because they may reflect fixable misconfiguration
3735 : 2 : {
3736 [ + - + - : 4 : auto x = libc_exception(f->fts_errno, string("fts traversal ") + string(f->fts_path));
+ - + - -
+ - + + -
- - - - ]
3737 [ + - ]: 2 : x.report(cerr);
3738 : : }
3739 [ + - + - : 4 : inc_metric("traversed_total","type","error");
+ - + - -
+ - + - -
- - ]
3740 : 2 : break;
3741 : :
3742 : 6 : case FTS_SL: // ignore, but count because debuginfod -L would traverse these
3743 [ + - + - : 12 : inc_metric("traversed_total","type","symlink");
+ - + - -
+ - + - -
- - ]
3744 : 6 : break;
3745 : :
3746 : 210 : case FTS_D: // ignore
3747 [ + - + - : 420 : inc_metric("traversed_total","type","directory");
+ - + - -
+ - + - -
- - ]
3748 : 210 : break;
3749 : :
3750 : 210 : default: // ignore
3751 [ + - + - : 420 : inc_metric("traversed_total","type","other");
+ - + - -
+ - + - -
- - ]
3752 : 210 : break;
3753 : : }
3754 : : }
3755 : 58 : clock_gettime (CLOCK_MONOTONIC, &ts_end);
3756 : 58 : double deltas = (ts_end.tv_sec - ts_start.tv_sec) + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9;
3757 : :
3758 [ + - + - : 174 : obatched(clog) << "fts traversed source paths in " << deltas << "s, scanned=" << fts_scanned
+ - + - ]
3759 [ + - + - : 58 : << ", regex-skipped=" << fts_regex << endl;
+ - ]
3760 : : }
3761 : :
3762 : :
3763 : : static void*
3764 : 32 : thread_main_fts_source_paths (void* arg)
3765 : : {
3766 : 32 : (void) arg; // ignore; we operate on global data
3767 : :
3768 [ + - + - : 64 : set_metric("thread_tid", "role","traverse", tid());
+ - - + -
+ - - -
- ]
3769 [ + - + - : 64 : add_metric("thread_count", "role", "traverse", 1);
+ - - + -
+ - - -
- ]
3770 : :
3771 : 32 : time_t last_rescan = 0;
3772 : :
3773 [ + - ]: 154 : while (! interrupted)
3774 : : {
3775 : 154 : sleep (1);
3776 : 154 : scanq.wait_idle(); // don't start a new traversal while scanners haven't finished the job
3777 : 154 : scanq.done_idle(); // release the hounds
3778 [ + + ]: 154 : if (interrupted) break;
3779 : :
3780 : 122 : time_t now = time(NULL);
3781 : 122 : bool rescan_now = false;
3782 [ + + ]: 122 : if (last_rescan == 0) // at least one initial rescan is documented even for -t0
3783 : 31 : rescan_now = true;
3784 [ + + + + ]: 122 : if (rescan_s > 0 && (long)now > (long)(last_rescan + rescan_s))
3785 : 4 : rescan_now = true;
3786 [ + + ]: 122 : if (sigusr1 != forced_rescan_count)
3787 : : {
3788 : 31 : forced_rescan_count = sigusr1;
3789 : 31 : rescan_now = true;
3790 : : }
3791 [ + + ]: 122 : if (rescan_now)
3792 : : {
3793 [ + - + - : 118 : set_metric("thread_busy", "role","traverse", 1);
+ - - + -
+ + - - -
- - ]
3794 : 59 : try
3795 : : {
3796 [ + - ]: 59 : scan_source_paths();
3797 : : }
3798 [ - - ]: 0 : catch (const reportable_exception& e)
3799 : : {
3800 [ - - ]: 0 : e.report(cerr);
3801 : : }
3802 : 59 : last_rescan = time(NULL); // NB: now was before scanning
3803 : : // finished a traversal loop
3804 [ + - + - : 118 : inc_metric("thread_work_total", "role","traverse");
+ - - + -
+ - - -
- ]
3805 [ + - + - : 118 : set_metric("thread_busy", "role","traverse", 0);
+ - - + -
+ - - -
- ]
3806 : : }
3807 : : }
3808 : :
3809 : 32 : return 0;
3810 : : }
3811 : :
3812 : :
3813 : :
3814 : : ////////////////////////////////////////////////////////////////////////
3815 : :
3816 : : static void
3817 : 35 : database_stats_report()
3818 : : {
3819 : 35 : sqlite_ps ps_query (db, "database-overview",
3820 [ + - + - : 140 : "select label,quantity from " BUILDIDS "_stats");
+ - + - -
- ]
3821 : :
3822 [ + - + - ]: 70 : obatched(clog) << "database record counts:" << endl;
3823 : 715 : while (1)
3824 : : {
3825 [ + - ]: 375 : if (interrupted) break;
3826 [ + + ]: 375 : if (sigusr1 != forced_rescan_count) // stop early if scan triggered
3827 : : break;
3828 : :
3829 [ + - ]: 374 : int rc = ps_query.step();
3830 [ + + ]: 374 : if (rc == SQLITE_DONE) break;
3831 [ - + ]: 340 : if (rc != SQLITE_ROW)
3832 [ # # # # ]: 0 : throw sqlite_exception(rc, "step");
3833 : :
3834 [ + - ]: 340 : obatched(clog)
3835 [ + - - + : 340 : << ((const char*) sqlite3_column_text(ps_query, 0) ?: (const char*) "NULL")
+ - ]
3836 : : << " "
3837 [ + - + - : 680 : << (sqlite3_column_text(ps_query, 1) ?: (const unsigned char*) "NULL")
- + + - ]
3838 : 340 : << endl;
3839 : :
3840 [ + - + - : 680 : set_metric("groom", "statistic",
+ - ]
3841 : 340 : ((const char*) sqlite3_column_text(ps_query, 0) ?: (const char*) "NULL"),
3842 [ + - - + : 782 : (sqlite3_column_double(ps_query, 1)));
+ - + - +
- - + + +
- - - - ]
3843 : 340 : }
3844 : 35 : }
3845 : :
3846 : :
3847 : : // Do a round of database grooming that might take many minutes to run.
3848 : 35 : void groom()
3849 : : {
3850 [ + - ]: 70 : obatched(clog) << "grooming database" << endl;
3851 : :
3852 : 35 : struct timespec ts_start, ts_end;
3853 : 35 : clock_gettime (CLOCK_MONOTONIC, &ts_start);
3854 : :
3855 : : // scan for files that have disappeared
3856 : 35 : sqlite_ps files (db, "check old files",
3857 : : "select distinct s.mtime, s.file, f.name from "
3858 : : BUILDIDS "_file_mtime_scanned s, " BUILDIDS "_files f "
3859 [ + - + - : 105 : "where f.id = s.file");
+ - + - -
- ]
3860 : : // NB: Because _ftime_mtime_scanned can contain both F and
3861 : : // R records for the same file, this query would return duplicates if the
3862 : : // DISTINCT qualifier were not there.
3863 [ + - ]: 35 : files.reset();
3864 : :
3865 : : // DECISION TIME - we enumerate stale fileids/mtimes
3866 [ + - ]: 70 : deque<pair<int64_t,int64_t> > stale_fileid_mtime;
3867 : :
3868 : 35 : time_t time_start = time(NULL);
3869 : 151 : while(1)
3870 : : {
3871 : : // PR28514: limit grooming iteration to O(rescan time), to avoid
3872 : : // slow filesystem tests over many files locking out rescans for
3873 : : // too long.
3874 [ + + - + ]: 93 : if (rescan_s > 0 && (long)time(NULL) > (long)(time_start + rescan_s))
3875 : : {
3876 [ # # # # : 0 : inc_metric("groomed_total", "decision", "aborted");
# # # # #
# # # # #
# # ]
3877 : 0 : break;
3878 : : }
3879 : :
3880 [ + - ]: 93 : if (interrupted) break;
3881 : :
3882 [ + - ]: 93 : int rc = files.step();
3883 [ + + ]: 93 : if (rc != SQLITE_ROW)
3884 : : break;
3885 : :
3886 [ + - ]: 58 : int64_t mtime = sqlite3_column_int64 (files, 0);
3887 [ + - ]: 58 : int64_t fileid = sqlite3_column_int64 (files, 1);
3888 [ + - - + ]: 58 : const char* filename = ((const char*) sqlite3_column_text (files, 2) ?: "");
3889 : 58 : struct stat s;
3890 : 58 : bool regex_file_drop = 0;
3891 : :
3892 [ + + ]: 58 : if (regex_groom)
3893 : : {
3894 [ + - ]: 8 : bool reg_include = !regexec (&file_include_regex, filename, 0, 0, 0);
3895 [ + - ]: 8 : bool reg_exclude = !regexec (&file_exclude_regex, filename, 0, 0, 0);
3896 : 8 : regex_file_drop = !reg_include || reg_exclude; // match logic of scan_source_paths
3897 : : }
3898 : :
3899 : 58 : rc = stat(filename, &s);
3900 [ + + - + ]: 58 : if ( regex_file_drop || rc < 0 || (mtime != (int64_t) s.st_mtime) )
3901 : : {
3902 [ + - ]: 12 : if (verbose > 2)
3903 [ + - + - : 24 : obatched(clog) << "groom: stale file=" << filename << " mtime=" << mtime << endl;
+ - + - +
- ]
3904 [ + - ]: 12 : stale_fileid_mtime.push_back(make_pair(fileid,mtime));
3905 [ + - + - : 24 : inc_metric("groomed_total", "decision", "stale");
+ - + - -
+ - + + -
- - - - ]
3906 [ + - + - : 24 : set_metric("thread_work_pending","role","groom", stale_fileid_mtime.size());
+ - + - -
+ - + - -
- - ]
3907 : : }
3908 : : else
3909 [ + - + - : 92 : inc_metric("groomed_total", "decision", "fresh");
+ - + - -
+ - + - -
- - ]
3910 : :
3911 [ + - ]: 58 : if (sigusr1 != forced_rescan_count) // stop early if scan triggered
3912 : : break;
3913 : 58 : }
3914 [ + - ]: 35 : files.reset();
3915 : :
3916 : : // ACTION TIME
3917 : :
3918 : : // Now that we know which file/mtime tuples are stale, actually do
3919 : : // the deletion from the database. Doing this during the SELECT
3920 : : // iteration above results in undefined behaviour in sqlite, as per
3921 : : // https://www.sqlite.org/isolation.html
3922 : :
3923 : : // We could shuffle stale_fileid_mtime[] here. It'd let aborted
3924 : : // sequences of nuke operations resume at random locations, instead
3925 : : // of just starting over. But it doesn't matter much either way,
3926 : : // as long as we make progress.
3927 : :
3928 [ + - + - : 140 : sqlite_ps files_del_f_de (db, "nuke f_de", "delete from " BUILDIDS "_f_de where file = ? and mtime = ?");
+ - + - +
- - - ]
3929 [ + - + - : 140 : sqlite_ps files_del_r_de (db, "nuke r_de", "delete from " BUILDIDS "_r_de where file = ? and mtime = ?");
+ - + - +
- - - ]
3930 [ + - ]: 35 : sqlite_ps files_del_scan (db, "nuke f_m_s", "delete from " BUILDIDS "_file_mtime_scanned "
3931 [ + - + - : 140 : "where file = ? and mtime = ?");
+ - + - -
- ]
3932 : :
3933 [ + + ]: 47 : while (! stale_fileid_mtime.empty())
3934 : : {
3935 : 12 : auto stale = stale_fileid_mtime.front();
3936 : 12 : stale_fileid_mtime.pop_front();
3937 [ + - + - : 24 : set_metric("thread_work_pending","role","groom", stale_fileid_mtime.size());
+ - + - -
+ - + - +
- - - - ]
3938 : :
3939 : : // PR28514: limit grooming iteration to O(rescan time), to avoid
3940 : : // slow nuke_* queries over many files locking out rescans for too
3941 : : // long. We iterate over the files in random() sequence to avoid
3942 : : // partial checks going over the same set.
3943 [ - + - - ]: 12 : if (rescan_s > 0 && (long)time(NULL) > (long)(time_start + rescan_s))
3944 : : {
3945 [ # # # # : 0 : inc_metric("groomed_total", "action", "aborted");
# # # # #
# # # # #
# # ]
3946 : 0 : break;
3947 : : }
3948 : :
3949 [ + - ]: 12 : if (interrupted) break;
3950 : :
3951 : 12 : int64_t fileid = stale.first;
3952 : 12 : int64_t mtime = stale.second;
3953 [ + - + - : 12 : files_del_f_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
+ - + - ]
3954 [ + - + - : 12 : files_del_r_de.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
+ - + - ]
3955 [ + - + - : 12 : files_del_scan.reset().bind(1,fileid).bind(2,mtime).step_ok_done();
+ - + - ]
3956 [ + - + - : 24 : inc_metric("groomed_total", "action", "cleaned");
+ - + - -
+ - + + -
- - - - ]
3957 : :
3958 [ + - ]: 12 : if (sigusr1 != forced_rescan_count) // stop early if scan triggered
3959 : : break;
3960 : : }
3961 : 35 : stale_fileid_mtime.clear(); // no need for this any longer
3962 [ + - + - : 70 : set_metric("thread_work_pending","role","groom", stale_fileid_mtime.size());
+ - + - -
+ - + + -
- - - - ]
3963 : :
3964 : : // delete buildids with no references in _r_de or _f_de tables;
3965 : : // cascades to _r_sref & _f_s records
3966 [ + - ]: 35 : sqlite_ps buildids_del (db, "nuke orphan buildids",
3967 : : "delete from " BUILDIDS "_buildids "
3968 : : "where not exists (select 1 from " BUILDIDS "_f_de d where " BUILDIDS "_buildids.id = d.buildid) "
3969 [ + - + - : 140 : "and not exists (select 1 from " BUILDIDS "_r_de d where " BUILDIDS "_buildids.id = d.buildid)");
+ - + - +
- - - ]
3970 [ + - + - ]: 35 : buildids_del.reset().step_ok_done();
3971 : :
3972 [ - + ]: 35 : if (interrupted) return;
3973 : :
3974 : : // NB: "vacuum" is too heavy for even daily runs: it rewrites the entire db, so is done as maxigroom -G
3975 [ + - + - : 140 : sqlite_ps g1 (db, "incremental vacuum", "pragma incremental_vacuum");
+ - + - +
- - - ]
3976 [ + - + - ]: 35 : g1.reset().step_ok_done();
3977 [ + - + - : 105 : sqlite_ps g2 (db, "optimize", "pragma optimize");
+ - - + +
- - - ]
3978 [ + - + - ]: 35 : g2.reset().step_ok_done();
3979 [ + - + - : 105 : sqlite_ps g3 (db, "wal checkpoint", "pragma wal_checkpoint=truncate");
+ - + - +
- - - ]
3980 [ + - + - ]: 35 : g3.reset().step_ok_done();
3981 : :
3982 [ + - ]: 35 : database_stats_report();
3983 : :
3984 [ + - + - : 70 : (void) statfs_free_enough_p(db_path, "database"); // report sqlite filesystem size
+ - ]
3985 : :
3986 [ + - ]: 35 : sqlite3_db_release_memory(db); // shrink the process if possible
3987 [ + - ]: 35 : sqlite3_db_release_memory(dbq); // ... for both connections
3988 [ + - ]: 35 : debuginfod_pool_groom(); // and release any debuginfod_client objects we've been holding onto
3989 : :
3990 [ + - ]: 35 : fdcache.limit(0,0,0,0); // release the fdcache contents
3991 [ + - ]: 35 : fdcache.limit(fdcache_fds, fdcache_mbs, fdcache_prefetch_fds, fdcache_prefetch_mbs); // restore status quo parameters
3992 : :
3993 : 35 : clock_gettime (CLOCK_MONOTONIC, &ts_end);
3994 : 35 : double deltas = (ts_end.tv_sec - ts_start.tv_sec) + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9;
3995 : :
3996 [ + - + - : 70 : obatched(clog) << "groomed database in " << deltas << "s" << endl;
+ - + - ]
3997 : : }
3998 : :
3999 : :
4000 : : static void*
4001 : 35 : thread_main_groom (void* /*arg*/)
4002 : : {
4003 [ + - + - : 70 : set_metric("thread_tid", "role", "groom", tid());
+ - - + -
+ - - -
- ]
4004 [ + - + - : 70 : add_metric("thread_count", "role", "groom", 1);
+ - - + -
+ - - -
- ]
4005 : :
4006 : 35 : time_t last_groom = 0;
4007 : :
4008 : 277 : while (1)
4009 : : {
4010 : 156 : sleep (1);
4011 : 156 : scanq.wait_idle(); // PR25394: block scanners during grooming!
4012 [ + + ]: 156 : if (interrupted) break;
4013 : :
4014 : 121 : time_t now = time(NULL);
4015 : 121 : bool groom_now = false;
4016 [ + + ]: 121 : if (last_groom == 0) // at least one initial groom is documented even for -g0
4017 : 32 : groom_now = true;
4018 [ + + + + ]: 121 : if (groom_s > 0 && (long)now > (long)(last_groom + groom_s))
4019 : 4 : groom_now = true;
4020 [ + + ]: 121 : if (sigusr2 != forced_groom_count)
4021 : : {
4022 : 3 : forced_groom_count = sigusr2;
4023 : 3 : groom_now = true;
4024 : : }
4025 [ + + ]: 121 : if (groom_now)
4026 : : {
4027 [ + - + - : 70 : set_metric("thread_busy", "role", "groom", 1);
+ - - + -
+ + - - -
- - ]
4028 : 35 : try
4029 : : {
4030 [ + - ]: 35 : groom ();
4031 : : }
4032 [ - - ]: 0 : catch (const sqlite_exception& e)
4033 : : {
4034 [ - - - - : 0 : obatched(cerr) << e.message << endl;
- - ]
4035 : : }
4036 : 35 : last_groom = time(NULL); // NB: now was before grooming
4037 : : // finished a grooming loop
4038 [ + - + - : 70 : inc_metric("thread_work_total", "role", "groom");
+ - - + -
+ - - -
- ]
4039 [ + - + - : 70 : set_metric("thread_busy", "role", "groom", 0);
+ - - + -
+ - - -
- ]
4040 : : }
4041 : :
4042 : 121 : scanq.done_idle();
4043 : 121 : }
4044 : :
4045 : 35 : return 0;
4046 : : }
4047 : :
4048 : :
4049 : : ////////////////////////////////////////////////////////////////////////
4050 : :
4051 : :
4052 : : static void
4053 : 36 : signal_handler (int /* sig */)
4054 : : {
4055 : 36 : interrupted ++;
4056 : :
4057 [ + + ]: 36 : if (db)
4058 : 35 : sqlite3_interrupt (db);
4059 [ + - ]: 36 : if (dbq)
4060 : 36 : sqlite3_interrupt (dbq);
4061 : :
4062 : : // NB: don't do anything else in here
4063 : 36 : }
4064 : :
4065 : : static void
4066 : 31 : sigusr1_handler (int /* sig */)
4067 : : {
4068 : 31 : sigusr1 ++;
4069 : : // NB: don't do anything else in here
4070 : 31 : }
4071 : :
4072 : : static void
4073 : 3 : sigusr2_handler (int /* sig */)
4074 : : {
4075 : 3 : sigusr2 ++;
4076 : : // NB: don't do anything else in here
4077 : 3 : }
4078 : :
4079 : :
4080 : : static void // error logging callback from libmicrohttpd internals
4081 : 0 : error_cb (void *arg, const char *fmt, va_list ap)
4082 : : {
4083 : 0 : (void) arg;
4084 [ # # # # : 0 : inc_metric("error_count","libmicrohttpd",fmt);
# # # # #
# # # #
# ]
4085 : 0 : char errmsg[512];
4086 : 0 : (void) vsnprintf (errmsg, sizeof(errmsg), fmt, ap); // ok if slightly truncated
4087 [ # # ]: 0 : obatched(cerr) << "libmicrohttpd error: " << errmsg; // MHD_DLOG calls already include \n
4088 : 0 : }
4089 : :
4090 : :
4091 : : // A user-defined sqlite function, to score the sharedness of the
4092 : : // prefix of two strings. This is used to compare candidate debuginfo
4093 : : // / source-rpm names, so that the closest match
4094 : : // (directory-topology-wise closest) is found. This is important in
4095 : : // case the same sref (source file name) is in many -debuginfo or
4096 : : // -debugsource RPMs, such as when multiple versions/releases of the
4097 : : // same package are in the database.
4098 : :
4099 : 148 : static void sqlite3_sharedprefix_fn (sqlite3_context* c, int argc, sqlite3_value** argv)
4100 : : {
4101 [ - + ]: 148 : if (argc != 2)
4102 : 0 : sqlite3_result_error(c, "expect 2 string arguments", -1);
4103 [ + - + + ]: 296 : else if ((sqlite3_value_type(argv[0]) != SQLITE_TEXT) ||
4104 : 148 : (sqlite3_value_type(argv[1]) != SQLITE_TEXT))
4105 : 3 : sqlite3_result_null(c);
4106 : : else
4107 : : {
4108 : 145 : const unsigned char* a = sqlite3_value_text (argv[0]);
4109 : 145 : const unsigned char* b = sqlite3_value_text (argv[1]);
4110 : 145 : int i = 0;
4111 [ + + + - : 17794 : while (*a != '\0' && *b != '\0' && *a++ == *b++)
+ + + + ]
4112 : 17524 : i++;
4113 : 145 : sqlite3_result_int (c, i);
4114 : : }
4115 : 148 : }
4116 : :
4117 : :
4118 : : static unsigned
4119 : 70 : default_concurrency() // guaranteed >= 1
4120 : : {
4121 : : // Prior to PR29975 & PR29976, we'd just use this:
4122 : 70 : unsigned sth = std::thread::hardware_concurrency();
4123 : : // ... but on many-CPU boxes, admins or distros may throttle
4124 : : // resources in such a way that debuginfod would mysteriously fail.
4125 : : // So we reduce the defaults:
4126 : :
4127 : 70 : unsigned aff = 0;
4128 : : #ifdef HAVE_SCHED_GETAFFINITY
4129 : 70 : {
4130 : 70 : int ret;
4131 : 70 : cpu_set_t mask;
4132 : 70 : CPU_ZERO(&mask);
4133 : 70 : ret = sched_getaffinity(0, sizeof(mask), &mask);
4134 [ + - ]: 70 : if (ret == 0)
4135 : 70 : aff = CPU_COUNT(&mask);
4136 : : }
4137 : : #endif
4138 : :
4139 : 70 : unsigned fn = 0;
4140 : : #ifdef HAVE_GETRLIMIT
4141 : 70 : {
4142 : 70 : struct rlimit rlim;
4143 : 70 : int rc = getrlimit(RLIMIT_NOFILE, &rlim);
4144 [ + - ]: 70 : if (rc == 0)
4145 [ + - ]: 140 : fn = max((rlim_t)1, (rlim.rlim_cur - 100) / 4);
4146 : : // at least 2 fds are used by each listener thread etc.
4147 : : // plus a bunch to account for shared libraries and such
4148 : : }
4149 : : #endif
4150 : :
4151 : 210 : unsigned d = min(max(sth, 1U),
4152 : 140 : min(max(aff, 1U),
4153 [ - + - + : 70 : max(fn, 1U)));
- + - + -
+ ]
4154 : 70 : return d;
4155 : : }
4156 : :
4157 : :
4158 : :
4159 : : int
4160 : 36 : main (int argc, char *argv[])
4161 : : {
4162 : 36 : (void) setlocale (LC_ALL, "");
4163 : 36 : (void) bindtextdomain (PACKAGE_TARNAME, LOCALEDIR);
4164 : 36 : (void) textdomain (PACKAGE_TARNAME);
4165 : :
4166 : : /* Tell the library which version we are expecting. */
4167 : 36 : elf_version (EV_CURRENT);
4168 : :
4169 [ + - - + ]: 72 : tmpdir = string(getenv("TMPDIR") ?: "/tmp");
4170 : :
4171 : : /* Set computed default values. */
4172 [ - + + - : 72 : db_path = string(getenv("HOME") ?: "/") + string("/.debuginfod.sqlite"); /* XDG? */
+ - - + -
+ + - -
- ]
4173 : 36 : int rc = regcomp (& file_include_regex, ".*", REG_EXTENDED|REG_NOSUB); // match everything
4174 [ - + ]: 36 : if (rc != 0)
4175 : 0 : error (EXIT_FAILURE, 0, "regcomp failure: %d", rc);
4176 : 36 : rc = regcomp (& file_exclude_regex, "^$", REG_EXTENDED|REG_NOSUB); // match nothing
4177 [ - + ]: 36 : if (rc != 0)
4178 : 0 : error (EXIT_FAILURE, 0, "regcomp failure: %d", rc);
4179 : :
4180 : : // default parameters for fdcache are computed from system stats
4181 : 36 : struct statfs sfs;
4182 : 36 : rc = statfs(tmpdir.c_str(), &sfs);
4183 [ - + ]: 36 : if (rc < 0)
4184 : 0 : fdcache_mbs = 1024; // 1 gigabyte
4185 : : else
4186 : 36 : fdcache_mbs = sfs.f_bavail * sfs.f_bsize / 1024 / 1024 / 4; // 25% of free space
4187 : 36 : fdcache_mintmp = 25; // emergency flush at 25% remaining (75% full)
4188 : 36 : fdcache_prefetch = 64; // guesstimate storage is this much less costly than re-decompression
4189 : 36 : fdcache_fds = (concurrency + fdcache_prefetch) * 2;
4190 : :
4191 : : /* Parse and process arguments. */
4192 : 36 : int remaining;
4193 : 36 : (void) argp_parse (&argp, argc, argv, ARGP_IN_ORDER, &remaining, NULL);
4194 [ - + ]: 36 : if (remaining != argc)
4195 : 0 : error (EXIT_FAILURE, 0,
4196 : 0 : "unexpected argument: %s", argv[remaining]);
4197 : :
4198 : : // Make the prefetch cache spaces a fraction of the main fdcache if
4199 : : // unspecified.
4200 [ + + ]: 36 : if (fdcache_prefetch_fds == 0)
4201 : 35 : fdcache_prefetch_fds = fdcache_fds / 2;
4202 [ + + ]: 36 : if (fdcache_prefetch_mbs == 0)
4203 : 35 : fdcache_prefetch_mbs = fdcache_mbs / 2;
4204 : :
4205 [ + + + + : 36 : if (scan_archives.size()==0 && !scan_files && source_paths.size()>0)
- + ]
4206 [ # # ]: 0 : obatched(clog) << "warning: without -F -R -U -Z, ignoring PATHs" << endl;
4207 : :
4208 : 36 : fdcache.limit(fdcache_fds, fdcache_mbs, fdcache_prefetch_fds, fdcache_prefetch_mbs);
4209 : :
4210 : 36 : (void) signal (SIGPIPE, SIG_IGN); // microhttpd can generate it incidentally, ignore
4211 : 36 : (void) signal (SIGINT, signal_handler); // ^C
4212 : 36 : (void) signal (SIGHUP, signal_handler); // EOF
4213 : 36 : (void) signal (SIGTERM, signal_handler); // systemd
4214 : 36 : (void) signal (SIGUSR1, sigusr1_handler); // end-user
4215 : 36 : (void) signal (SIGUSR2, sigusr2_handler); // end-user
4216 : :
4217 : : /* Get database ready. */
4218 [ + + ]: 36 : if (! passive_p)
4219 : : {
4220 : 35 : rc = sqlite3_open_v2 (db_path.c_str(), &db, (SQLITE_OPEN_READWRITE
4221 : : |SQLITE_OPEN_URI
4222 : : |SQLITE_OPEN_PRIVATECACHE
4223 : : |SQLITE_OPEN_CREATE
4224 : : |SQLITE_OPEN_FULLMUTEX), /* thread-safe */
4225 : : NULL);
4226 [ - + ]: 35 : if (rc == SQLITE_CORRUPT)
4227 : : {
4228 : 0 : (void) unlink (db_path.c_str());
4229 : 0 : error (EXIT_FAILURE, 0,
4230 : : "cannot open %s, deleted database: %s", db_path.c_str(), sqlite3_errmsg(db));
4231 : : }
4232 [ - + ]: 35 : else if (rc)
4233 : : {
4234 : 0 : error (EXIT_FAILURE, 0,
4235 : : "cannot open %s, consider deleting database: %s", db_path.c_str(), sqlite3_errmsg(db));
4236 : : }
4237 : : }
4238 : :
4239 : : // open the readonly query variant
4240 : : // NB: PRIVATECACHE allows web queries to operate in parallel with
4241 : : // much other grooming/scanning operation.
4242 : 36 : rc = sqlite3_open_v2 (db_path.c_str(), &dbq, (SQLITE_OPEN_READONLY
4243 : : |SQLITE_OPEN_URI
4244 : : |SQLITE_OPEN_PRIVATECACHE
4245 : : |SQLITE_OPEN_FULLMUTEX), /* thread-safe */
4246 : : NULL);
4247 [ - + ]: 36 : if (rc)
4248 : : {
4249 : 0 : error (EXIT_FAILURE, 0,
4250 : : "cannot open %s, consider deleting database: %s", db_path.c_str(), sqlite3_errmsg(dbq));
4251 : : }
4252 : :
4253 : :
4254 [ + - ]: 72 : obatched(clog) << "opened database " << db_path
4255 [ + + + - : 37 : << (db?" rw":"") << (dbq?" ro":"") << endl;
- + + - +
- ]
4256 [ + - + - ]: 72 : obatched(clog) << "sqlite version " << sqlite3_version << endl;
4257 [ + + + - : 107 : obatched(clog) << "service mode " << (passive_p ? "passive":"active") << endl;
+ - ]
4258 : :
4259 : : // add special string-prefix-similarity function used in rpm sref/sdef resolution
4260 : 36 : rc = sqlite3_create_function(dbq, "sharedprefix", 2, SQLITE_UTF8, NULL,
4261 : : & sqlite3_sharedprefix_fn, NULL, NULL);
4262 [ - + ]: 36 : if (rc != SQLITE_OK)
4263 : 0 : error (EXIT_FAILURE, 0,
4264 : : "cannot create sharedprefix function: %s", sqlite3_errmsg(dbq));
4265 : :
4266 [ + + ]: 36 : if (! passive_p)
4267 : : {
4268 [ + + ]: 35 : if (verbose > 3)
4269 [ + - + - ]: 44 : obatched(clog) << "ddl: " << DEBUGINFOD_SQLITE_DDL << endl;
4270 : 35 : rc = sqlite3_exec (db, DEBUGINFOD_SQLITE_DDL, NULL, NULL, NULL);
4271 [ - + ]: 35 : if (rc != SQLITE_OK)
4272 : : {
4273 : 0 : error (EXIT_FAILURE, 0,
4274 : : "cannot run database schema ddl: %s", sqlite3_errmsg(db));
4275 : : }
4276 : : }
4277 : :
4278 [ + - + - : 72 : obatched(clog) << "libmicrohttpd version " << MHD_get_version() << endl;
+ - ]
4279 : :
4280 : : /* If '-C' wasn't given or was given with no arg, pick a reasonable default
4281 : : for the number of worker threads. */
4282 [ + + ]: 36 : if (connection_pool == 0)
4283 : 34 : connection_pool = default_concurrency();
4284 : :
4285 : : /* Note that MHD_USE_EPOLL and MHD_USE_THREAD_PER_CONNECTION don't
4286 : : work together. */
4287 : 36 : unsigned int use_epoll = 0;
4288 : : #if MHD_VERSION >= 0x00095100
4289 : 36 : use_epoll = MHD_USE_EPOLL;
4290 : : #endif
4291 : :
4292 : 36 : unsigned int mhd_flags = (
4293 : : #if MHD_VERSION >= 0x00095300
4294 : : MHD_USE_INTERNAL_POLLING_THREAD
4295 : : #else
4296 : : MHD_USE_SELECT_INTERNALLY
4297 : : #endif
4298 : : | MHD_USE_DUAL_STACK
4299 : : | use_epoll
4300 : : #if MHD_VERSION >= 0x00095200
4301 : : | MHD_USE_ITC
4302 : : #endif
4303 : : | MHD_USE_DEBUG); /* report errors to stderr */
4304 : :
4305 : : // Start httpd server threads. Use a single dual-homed pool.
4306 : 36 : MHD_Daemon *d46 = MHD_start_daemon (mhd_flags, http_port,
4307 : : NULL, NULL, /* default accept policy */
4308 : : handler_cb, NULL, /* handler callback */
4309 : : MHD_OPTION_EXTERNAL_LOGGER,
4310 : : error_cb, NULL,
4311 : : MHD_OPTION_THREAD_POOL_SIZE,
4312 : : (int)connection_pool,
4313 : : MHD_OPTION_END);
4314 : :
4315 : 36 : MHD_Daemon *d4 = NULL;
4316 [ - + ]: 36 : if (d46 == NULL)
4317 : : {
4318 : : // Cannot use dual_stack, use ipv4 only
4319 : 0 : mhd_flags &= ~(MHD_USE_DUAL_STACK);
4320 [ # # ]: 0 : d4 = MHD_start_daemon (mhd_flags, http_port,
4321 : : NULL, NULL, /* default accept policy */
4322 : : handler_cb, NULL, /* handler callback */
4323 : : MHD_OPTION_EXTERNAL_LOGGER,
4324 : : error_cb, NULL,
4325 : : (connection_pool
4326 : : ? MHD_OPTION_THREAD_POOL_SIZE
4327 : : : MHD_OPTION_END),
4328 : : (connection_pool
4329 : : ? (int)connection_pool
4330 : : : MHD_OPTION_END),
4331 : : MHD_OPTION_END);
4332 [ # # ]: 0 : if (d4 == NULL)
4333 : : {
4334 : 0 : sqlite3 *database = db;
4335 : 0 : sqlite3 *databaseq = dbq;
4336 : 0 : db = dbq = 0; // for signal_handler not to freak
4337 : 0 : sqlite3_close (databaseq);
4338 : 0 : sqlite3_close (database);
4339 : 0 : error (EXIT_FAILURE, 0, "cannot start http server at port %d",
4340 : : http_port);
4341 : : }
4342 : :
4343 : : }
4344 : 36 : obatched(clog) << "started http server on"
4345 : : << (d4 != NULL ? " IPv4 " : " IPv4 IPv6 ")
4346 [ + - + - : 72 : << "port=" << http_port << endl;
+ - + - +
- ]
4347 : :
4348 : : // add maxigroom sql if -G given
4349 [ - + ]: 36 : if (maxigroom)
4350 : : {
4351 [ # # ]: 0 : obatched(clog) << "maxigrooming database, please wait." << endl;
4352 [ # # ]: 0 : extra_ddl.push_back("create index if not exists " BUILDIDS "_r_sref_arc on " BUILDIDS "_r_sref(artifactsrc);");
4353 [ # # ]: 0 : extra_ddl.push_back("delete from " BUILDIDS "_r_sdef where not exists (select 1 from " BUILDIDS "_r_sref b where " BUILDIDS "_r_sdef.content = b.artifactsrc);");
4354 [ # # ]: 0 : extra_ddl.push_back("drop index if exists " BUILDIDS "_r_sref_arc;");
4355 : :
4356 : : // NB: we don't maxigroom the _files interning table. It'd require a temp index on all the
4357 : : // tables that have file foreign-keys, which is a lot.
4358 : :
4359 : : // NB: with =delete, may take up 3x disk space total during vacuum process
4360 : : // vs. =off (only 2x but may corrupt database if program dies mid-vacuum)
4361 : : // vs. =wal (>3x observed, but safe)
4362 [ # # ]: 0 : extra_ddl.push_back("pragma journal_mode=delete;");
4363 [ # # ]: 0 : extra_ddl.push_back("vacuum;");
4364 [ # # ]: 0 : extra_ddl.push_back("pragma journal_mode=wal;");
4365 : : }
4366 : :
4367 : : // run extra -D sql if given
4368 [ + + ]: 36 : if (! passive_p)
4369 [ - + ]: 35 : for (auto&& i: extra_ddl)
4370 : : {
4371 [ # # ]: 0 : if (verbose > 1)
4372 [ # # # # ]: 0 : obatched(clog) << "extra ddl:\n" << i << endl;
4373 : 0 : rc = sqlite3_exec (db, i.c_str(), NULL, NULL, NULL);
4374 [ # # # # ]: 0 : if (rc != SQLITE_OK && rc != SQLITE_DONE && rc != SQLITE_ROW)
4375 : 0 : error (0, 0,
4376 : : "warning: cannot run database extra ddl %s: %s", i.c_str(), sqlite3_errmsg(db));
4377 : :
4378 [ # # ]: 0 : if (maxigroom)
4379 [ # # ]: 0 : obatched(clog) << "maxigroomed database" << endl;
4380 : : }
4381 : :
4382 [ + + ]: 36 : if (! passive_p)
4383 [ + - + - ]: 70 : obatched(clog) << "search concurrency " << concurrency << endl;
4384 : 36 : obatched(clog) << "webapi connection pool " << connection_pool
4385 [ + - - + : 36 : << (connection_pool ? "" : " (unlimited)") << endl;
+ - + - ]
4386 [ + + ]: 36 : if (! passive_p)
4387 [ + - + - ]: 70 : obatched(clog) << "rescan time " << rescan_s << endl;
4388 [ + - + - ]: 72 : obatched(clog) << "fdcache fds " << fdcache_fds << endl;
4389 [ + - + - ]: 72 : obatched(clog) << "fdcache mbs " << fdcache_mbs << endl;
4390 [ + - + - ]: 72 : obatched(clog) << "fdcache prefetch " << fdcache_prefetch << endl;
4391 [ + - + - ]: 72 : obatched(clog) << "fdcache tmpdir " << tmpdir << endl;
4392 [ + - + - ]: 72 : obatched(clog) << "fdcache tmpdir min% " << fdcache_mintmp << endl;
4393 [ + + ]: 36 : if (! passive_p)
4394 [ + - + - ]: 70 : obatched(clog) << "groom time " << groom_s << endl;
4395 [ + - + - ]: 72 : obatched(clog) << "prefetch fds " << fdcache_prefetch_fds << endl;
4396 [ + - + - ]: 72 : obatched(clog) << "prefetch mbs " << fdcache_prefetch_mbs << endl;
4397 [ + - + - ]: 72 : obatched(clog) << "forwarded ttl limit " << forwarded_ttl_limit << endl;
4398 : :
4399 [ + + ]: 36 : if (scan_archives.size()>0)
4400 : : {
4401 : 25 : obatched ob(clog);
4402 [ + - ]: 25 : auto& o = ob << "accepting archive types ";
4403 [ + + ]: 76 : for (auto&& arch : scan_archives)
4404 [ + - + - : 51 : o << arch.first << "(" << arch.second << ") ";
+ - + - ]
4405 [ + - ]: 25 : o << endl;
4406 : : }
4407 : 36 : const char* du = getenv(DEBUGINFOD_URLS_ENV_VAR);
4408 [ + + + + ]: 36 : if (du && du[0] != '\0') // set to non-empty string?
4409 [ + - + - ]: 14 : obatched(clog) << "upstream debuginfod servers: " << du << endl;
4410 : :
4411 [ + + ]: 72 : vector<pthread_t> all_threads;
4412 : :
4413 [ + + ]: 36 : if (! passive_p)
4414 : : {
4415 : 35 : pthread_t pt;
4416 : 35 : rc = pthread_create (& pt, NULL, thread_main_groom, NULL);
4417 [ - + ]: 35 : if (rc)
4418 : 0 : error (EXIT_FAILURE, rc, "cannot spawn thread to groom database\n");
4419 : : else
4420 : : {
4421 : : #ifdef HAVE_PTHREAD_SETNAME_NP
4422 : 35 : (void) pthread_setname_np (pt, "groom");
4423 : : #endif
4424 [ + - ]: 35 : all_threads.push_back(pt);
4425 : : }
4426 : :
4427 [ + + + + ]: 35 : if (scan_files || scan_archives.size() > 0)
4428 : : {
4429 : 32 : rc = pthread_create (& pt, NULL, thread_main_fts_source_paths, NULL);
4430 [ - + ]: 32 : if (rc)
4431 : 0 : error (EXIT_FAILURE, rc, "cannot spawn thread to traverse source paths\n");
4432 : : #ifdef HAVE_PTHREAD_SETNAME_NP
4433 : 32 : (void) pthread_setname_np (pt, "traverse");
4434 : : #endif
4435 [ + - ]: 32 : all_threads.push_back(pt);
4436 : :
4437 [ + + ]: 160 : for (unsigned i=0; i<concurrency; i++)
4438 : : {
4439 : 128 : rc = pthread_create (& pt, NULL, thread_main_scanner, NULL);
4440 [ - + ]: 128 : if (rc)
4441 : 0 : error (EXIT_FAILURE, rc, "cannot spawn thread to scan source files / archives\n");
4442 : : #ifdef HAVE_PTHREAD_SETNAME_NP
4443 : 128 : (void) pthread_setname_np (pt, "scan");
4444 : : #endif
4445 [ + - ]: 128 : all_threads.push_back(pt);
4446 : : }
4447 : : }
4448 : : }
4449 : :
4450 : : /* Trivial main loop! */
4451 [ + - + - : 72 : set_metric("ready", 1);
- - ]
4452 [ + + ]: 106 : while (! interrupted)
4453 [ + - ]: 70 : pause ();
4454 [ + - ]: 36 : scanq.nuke(); // wake up any remaining scanq-related threads, let them die
4455 [ + - + - : 72 : set_metric("ready", 0);
+ - ]
4456 : :
4457 [ + - ]: 36 : if (verbose)
4458 [ + - + - : 72 : obatched(clog) << "stopping" << endl;
- - ]
4459 : :
4460 : : /* Join all our threads. */
4461 [ + + ]: 231 : for (auto&& it : all_threads)
4462 [ + - ]: 195 : pthread_join (it, NULL);
4463 : :
4464 : : /* Stop all the web service threads. */
4465 [ + - + - ]: 36 : if (d46) MHD_stop_daemon (d46);
4466 [ - + - - ]: 36 : if (d4) MHD_stop_daemon (d4);
4467 : :
4468 [ + + ]: 36 : if (! passive_p)
4469 : : {
4470 : : /* With all threads known dead, we can clean up the global resources. */
4471 [ + - ]: 35 : rc = sqlite3_exec (db, DEBUGINFOD_SQLITE_CLEANUP_DDL, NULL, NULL, NULL);
4472 [ - + ]: 35 : if (rc != SQLITE_OK)
4473 : : {
4474 [ # # # # ]: 0 : error (0, 0,
4475 : : "warning: cannot run database cleanup ddl: %s", sqlite3_errmsg(db));
4476 : : }
4477 : : }
4478 : :
4479 [ + - ]: 36 : debuginfod_pool_groom ();
4480 : :
4481 : : // NB: no problem with unconditional free here - an earlier failed regcomp would exit program
4482 [ + - ]: 36 : (void) regfree (& file_include_regex);
4483 [ + - ]: 36 : (void) regfree (& file_exclude_regex);
4484 : :
4485 : 36 : sqlite3 *database = db;
4486 : 36 : sqlite3 *databaseq = dbq;
4487 : 36 : db = dbq = 0; // for signal_handler not to freak
4488 [ + - ]: 36 : (void) sqlite3_close (databaseq);
4489 [ + + ]: 36 : if (! passive_p)
4490 [ + - ]: 35 : (void) sqlite3_close (database);
4491 : :
4492 [ + + ]: 36 : return 0;
4493 : : }
|