Branch data Line data Source code
1 : : /* Debuginfo-over-http server.
2 : : Copyright (C) 2019-2024 Red Hat, Inc.
3 : : Copyright (C) 2021, 2022 Mark J. Wielaard <mark@klomp.org>
4 : : This file is part of elfutils.
5 : :
6 : : This file is free software; you can redistribute it and/or modify
7 : : it under the terms of the GNU General Public License as published by
8 : : the Free Software Foundation; either version 3 of the License, or
9 : : (at your option) any later version.
10 : :
11 : : elfutils is distributed in the hope that it will be useful, but
12 : : WITHOUT ANY WARRANTY; without even the implied warranty of
13 : : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : : GNU General Public License for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 : :
19 : :
20 : : /* cargo-cult from libdwfl linux-kernel-modules.c */
21 : : /* In case we have a bad fts we include this before config.h because it
22 : : can't handle _FILE_OFFSET_BITS.
23 : : Everything we need here is fine if its declarations just come first.
24 : : Also, include sys/types.h before fts. On some systems fts.h is not self
25 : : contained. */
26 : : #ifdef BAD_FTS
27 : : #include <sys/types.h>
28 : : #include <fts.h>
29 : : #endif
30 : :
31 : : #ifdef HAVE_CONFIG_H
32 : : #include "config.h"
33 : : #endif
34 : :
35 : : // #define _GNU_SOURCE
36 : : #ifdef HAVE_SCHED_H
37 : : extern "C" {
38 : : #include <sched.h>
39 : : }
40 : : #endif
41 : : #ifdef HAVE_SYS_RESOURCE_H
42 : : extern "C" {
43 : : #include <sys/resource.h>
44 : : }
45 : : #endif
46 : :
47 : : #ifdef HAVE_EXECINFO_H
48 : : extern "C" {
49 : : #include <execinfo.h>
50 : : }
51 : : #endif
52 : : #ifdef HAVE_MALLOC_H
53 : : extern "C" {
54 : : #include <malloc.h>
55 : : }
56 : : #endif
57 : :
58 : : #include "debuginfod.h"
59 : : #include <dwarf.h>
60 : :
61 : : #include <argp.h>
62 : : #ifdef __GNUC__
63 : : #undef __attribute__ /* glibc bug - rhbz 1763325 */
64 : : #endif
65 : :
66 : : #ifdef USE_LZMA
67 : : #include <lzma.h>
68 : : #endif
69 : :
70 : : #include <unistd.h>
71 : : #include <stdlib.h>
72 : : #include <locale.h>
73 : : #include <pthread.h>
74 : : #include <signal.h>
75 : : #include <sys/stat.h>
76 : : #include <sys/time.h>
77 : : #include <sys/vfs.h>
78 : : #include <unistd.h>
79 : : #include <fcntl.h>
80 : : #include <netdb.h>
81 : : #include <math.h>
82 : : #include <float.h>
83 : : #include <fnmatch.h>
84 : :
85 : :
86 : : /* If fts.h is included before config.h, its indirect inclusions may not
87 : : give us the right LFS aliases of these functions, so map them manually. */
88 : : #ifdef BAD_FTS
89 : : #ifdef _FILE_OFFSET_BITS
90 : : #define open open64
91 : : #define fopen fopen64
92 : : #endif
93 : : #else
94 : : #include <sys/types.h>
95 : : #include <fts.h>
96 : : #endif
97 : :
98 : : #include <cstring>
99 : : #include <vector>
100 : : #include <set>
101 : : #include <unordered_set>
102 : : #include <map>
103 : : #include <string>
104 : : #include <iostream>
105 : : #include <iomanip>
106 : : #include <ostream>
107 : : #include <sstream>
108 : : #include <mutex>
109 : : #include <deque>
110 : : #include <condition_variable>
111 : : #include <exception>
112 : : #include <thread>
113 : : // #include <regex> // on rhel7 gcc 4.8, not competent
114 : : #include <regex.h>
115 : : // #include <algorithm>
116 : : using namespace std;
117 : :
118 : : #include <gelf.h>
119 : : #include <libdwelf.h>
120 : :
121 : : #include <microhttpd.h>
122 : :
123 : : #if MHD_VERSION >= 0x00097002
124 : : // libmicrohttpd 0.9.71 broke API
125 : : #define MHD_RESULT enum MHD_Result
126 : : #else
127 : : #define MHD_RESULT int
128 : : #endif
129 : :
130 : : #ifdef ENABLE_IMA_VERIFICATION
131 : : #include <rpm/rpmlib.h>
132 : : #include <rpm/rpmfi.h>
133 : : #include <rpm/header.h>
134 : : #include <glob.h>
135 : : #endif
136 : :
137 : : #include <curl/curl.h>
138 : : #include <archive.h>
139 : : #include <archive_entry.h>
140 : : #include <sqlite3.h>
141 : :
142 : : #ifdef __linux__
143 : : #include <sys/syscall.h>
144 : : #endif
145 : :
146 : : #ifdef __linux__
147 : : #define tid() syscall(SYS_gettid)
148 : : #else
149 : : #define tid() pthread_self()
150 : : #endif
151 : :
152 : : extern "C" {
153 : : #include "printversion.h"
154 : : #include "system.h"
155 : : }
156 : : #include <json-c/json.h>
157 : :
158 : :
159 : : inline bool
160 : 137023 : string_endswith(const string& haystack, const string& needle)
161 : : {
162 [ + + ]: 137023 : return (haystack.size() >= needle.size() &&
163 : 132021 : equal(haystack.end()-needle.size(), haystack.end(),
164 : 137023 : needle.begin()));
165 : : }
166 : :
167 : :
168 : : // Roll this identifier for every sqlite schema incompatibility.
169 : : #define BUILDIDS "buildids10"
170 : :
171 : : #if SQLITE_VERSION_NUMBER >= 3008000
172 : : #define WITHOUT_ROWID "without rowid"
173 : : #else
174 : : #define WITHOUT_ROWID ""
175 : : #endif
176 : :
177 : : static const char DEBUGINFOD_SQLITE_DDL[] =
178 : : "pragma foreign_keys = on;\n"
179 : : "pragma synchronous = 0;\n" // disable fsync()s - this cache is disposable across a machine crash
180 : : "pragma journal_mode = wal;\n" // https://sqlite.org/wal.html
181 : : "pragma wal_checkpoint = truncate;\n" // clean out any preexisting wal file
182 : : "pragma journal_size_limit = 0;\n" // limit steady state file (between grooming, which also =truncate's)
183 : : "pragma auto_vacuum = incremental;\n" // https://sqlite.org/pragma.html
184 : : "pragma busy_timeout = 1000;\n" // https://sqlite.org/pragma.html
185 : : // NB: all these are overridable with -D option
186 : :
187 : : // Normalization table for interning file names
188 : : "create table if not exists " BUILDIDS "_fileparts (\n"
189 : : " id integer primary key not null,\n"
190 : : " name text unique not null\n"
191 : : " );\n"
192 : : "create table if not exists " BUILDIDS "_files (\n"
193 : : " id integer primary key not null,\n"
194 : : " dirname integer not null,\n"
195 : : " basename integer not null,\n"
196 : : " unique (dirname, basename),\n"
197 : : " foreign key (dirname) references " BUILDIDS "_fileparts(id) on delete cascade,\n"
198 : : " foreign key (basename) references " BUILDIDS "_fileparts(id) on delete cascade\n"
199 : : " );\n"
200 : : "create view if not exists " BUILDIDS "_files_v as\n" // a
201 : : " select f.id, n1.name || '/' || n2.name as name\n"
202 : : " from " BUILDIDS "_files f, " BUILDIDS "_fileparts n1, " BUILDIDS "_fileparts n2\n"
203 : : " where f.dirname = n1.id and f.basename = n2.id;\n"
204 : :
205 : : // Normalization table for interning buildids
206 : : "create table if not exists " BUILDIDS "_buildids (\n"
207 : : " id integer primary key not null,\n"
208 : : " hex text unique not null);\n"
209 : : // Track the completion of scanning of a given file & sourcetype at given time
210 : : "create table if not exists " BUILDIDS "_file_mtime_scanned (\n"
211 : : " mtime integer not null,\n"
212 : : " file integer not null,\n"
213 : : " size integer not null,\n" // in bytes
214 : : " sourcetype text(1) not null\n"
215 : : " check (sourcetype IN ('F', 'R')),\n"
216 : : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
217 : : " primary key (file, mtime, sourcetype)\n"
218 : : " ) " WITHOUT_ROWID ";\n"
219 : : "create table if not exists " BUILDIDS "_f_de (\n"
220 : : " buildid integer not null,\n"
221 : : " debuginfo_p integer not null,\n"
222 : : " executable_p integer not null,\n"
223 : : " file integer not null,\n"
224 : : " mtime integer not null,\n"
225 : : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
226 : : " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
227 : : " primary key (buildid, file, mtime)\n"
228 : : " ) " WITHOUT_ROWID ";\n"
229 : : // Index for faster delete by file identifier and metadata searches
230 : : "create index if not exists " BUILDIDS "_f_de_idx on " BUILDIDS "_f_de (file, mtime);\n"
231 : : "create table if not exists " BUILDIDS "_f_s (\n"
232 : : " buildid integer not null,\n"
233 : : " artifactsrc integer not null,\n"
234 : : " file integer not null,\n" // NB: not necessarily entered into _mtime_scanned
235 : : " mtime integer not null,\n"
236 : : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
237 : : " foreign key (artifactsrc) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
238 : : " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
239 : : " primary key (buildid, artifactsrc, file, mtime)\n"
240 : : " ) " WITHOUT_ROWID ";\n"
241 : : "create table if not exists " BUILDIDS "_r_de (\n"
242 : : " buildid integer not null,\n"
243 : : " debuginfo_p integer not null,\n"
244 : : " executable_p integer not null,\n"
245 : : " file integer not null,\n"
246 : : " mtime integer not null,\n"
247 : : " content integer not null,\n"
248 : : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
249 : : " foreign key (content) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
250 : : " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
251 : : " primary key (buildid, debuginfo_p, executable_p, file, content, mtime)\n"
252 : : " ) " WITHOUT_ROWID ";\n"
253 : : // Index for faster delete by archive file identifier
254 : : "create index if not exists " BUILDIDS "_r_de_idx on " BUILDIDS "_r_de (file, mtime);\n"
255 : : // Index for metadata searches
256 : : "create index if not exists " BUILDIDS "_r_de_idx2 on " BUILDIDS "_r_de (content);\n"
257 : : "create table if not exists " BUILDIDS "_r_sref (\n" // outgoing dwarf sourcefile references from rpm
258 : : " buildid integer not null,\n"
259 : : " artifactsrc integer not null,\n"
260 : : " foreign key (artifactsrc) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
261 : : " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
262 : : " primary key (buildid, artifactsrc)\n"
263 : : " ) " WITHOUT_ROWID ";\n"
264 : : "create table if not exists " BUILDIDS "_r_sdef (\n" // rpm contents that may satisfy sref
265 : : " file integer not null,\n"
266 : : " mtime integer not null,\n"
267 : : " content integer not null,\n"
268 : : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
269 : : " foreign key (content) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
270 : : " primary key (content, file, mtime)\n"
271 : : " ) " WITHOUT_ROWID ";\n"
272 : : "create table if not exists " BUILDIDS "_r_seekable (\n" // seekable rpm contents
273 : : " file integer not null,\n"
274 : : " content integer not null,\n"
275 : : " type text not null,\n"
276 : : " size integer not null,\n"
277 : : " offset integer not null,\n"
278 : : " mtime integer not null,\n"
279 : : " foreign key (file) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
280 : : " foreign key (content) references " BUILDIDS "_files(id) on update cascade on delete cascade,\n"
281 : : " primary key (file, content)\n"
282 : : " ) " WITHOUT_ROWID ";\n"
283 : : // create views to glue together some of the above tables, for webapi D queries
284 : : // NB: _query_d2 and _query_e2 were added to replace _query_d and _query_e
285 : : // without updating BUILDIDS. They can be renamed back the next time BUILDIDS
286 : : // is updated.
287 : : "create view if not exists " BUILDIDS "_query_d2 as \n"
288 : : "select\n"
289 : : " b.hex as buildid, 'F' as sourcetype, n.file as id0, f0.name as source0, n.mtime as mtime, null as id1, null as source1\n"
290 : : " from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_f_de n\n"
291 : : " where b.id = n.buildid and f0.id = n.file and n.debuginfo_p = 1\n"
292 : : "union all select\n"
293 : : " b.hex as buildid, 'R' as sourcetype, n.file as id0, f0.name as source0, n.mtime as mtime, n.content as id1, f1.name as source1\n"
294 : : " from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_files_v f1, " BUILDIDS "_r_de n\n"
295 : : " where b.id = n.buildid and f0.id = n.file and f1.id = n.content and n.debuginfo_p = 1\n"
296 : : ";"
297 : : // ... and for E queries
298 : : "create view if not exists " BUILDIDS "_query_e2 as \n"
299 : : "select\n"
300 : : " b.hex as buildid, 'F' as sourcetype, n.file as id0, f0.name as source0, n.mtime as mtime, null as id1, null as source1\n"
301 : : " from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_f_de n\n"
302 : : " where b.id = n.buildid and f0.id = n.file and n.executable_p = 1\n"
303 : : "union all select\n"
304 : : " b.hex as buildid, 'R' as sourcetype, n.file as id0, f0.name as source0, n.mtime as mtime, n.content as id1, f1.name as source1\n"
305 : : " from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_files_v f1, " BUILDIDS "_r_de n\n"
306 : : " where b.id = n.buildid and f0.id = n.file and f1.id = n.content and n.executable_p = 1\n"
307 : : ";"
308 : : // ... and for S queries
309 : : "create view if not exists " BUILDIDS "_query_s as \n"
310 : : "select\n"
311 : : " b.hex as buildid, fs.name as artifactsrc, 'F' as sourcetype, f0.name as source0, n.mtime as mtime, null as source1, null as source0ref\n"
312 : : " from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_files_v fs, " BUILDIDS "_f_s n\n"
313 : : " where b.id = n.buildid and f0.id = n.file and fs.id = n.artifactsrc\n"
314 : : "union all select\n"
315 : : " b.hex as buildid, f1.name as artifactsrc, 'R' as sourcetype, f0.name as source0, sd.mtime as mtime, f1.name as source1, fsref.name as source0ref\n"
316 : : " from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f0, " BUILDIDS "_files_v f1, " BUILDIDS "_files_v fsref, "
317 : : " " BUILDIDS "_r_sdef sd, " BUILDIDS "_r_sref sr, " BUILDIDS "_r_de sde\n"
318 : : " where b.id = sr.buildid and f0.id = sd.file and fsref.id = sde.file and f1.id = sd.content\n"
319 : : " and sr.artifactsrc = sd.content and sde.buildid = sr.buildid\n"
320 : : ";"
321 : : // and for startup overview counts
322 : : "drop view if exists " BUILDIDS "_stats;\n"
323 : : "create view if not exists " BUILDIDS "_stats as\n"
324 : : " select 'file d/e' as label,count(*) as quantity from " BUILDIDS "_f_de\n"
325 : : "union all select 'file s',count(*) from " BUILDIDS "_f_s\n"
326 : : "union all select 'archive d/e',count(*) from " BUILDIDS "_r_de\n"
327 : : "union all select 'archive sref',count(*) from " BUILDIDS "_r_sref\n"
328 : : "union all select 'archive sdef',count(*) from " BUILDIDS "_r_sdef\n"
329 : : "union all select 'buildids',count(*) from " BUILDIDS "_buildids\n"
330 : : "union all select 'filenames',count(*) from " BUILDIDS "_files\n"
331 : : "union all select 'fileparts',count(*) from " BUILDIDS "_fileparts\n"
332 : : "union all select 'files scanned (#)',count(*) from " BUILDIDS "_file_mtime_scanned\n"
333 : : "union all select 'files scanned (mb)',coalesce(sum(size)/1024/1024,0) from " BUILDIDS "_file_mtime_scanned\n"
334 : : #if SQLITE_VERSION_NUMBER >= 3016000
335 : : "union all select 'index db size (mb)',page_count*page_size/1024/1024 as size FROM pragma_page_count(), pragma_page_size()\n"
336 : : #endif
337 : : ";\n"
338 : :
339 : : // schema change history & garbage collection
340 : : //
341 : : // XXX: we could have migration queries here to bring prior-schema
342 : : // data over instead of just dropping it. But that could incur
343 : : // doubled storage costs.
344 : : //
345 : : // buildids10: split the _files table into _parts
346 : : "" // <<< we are here
347 : : // buildids9: widen the mtime_scanned table
348 : : "DROP VIEW IF EXISTS buildids9_stats;\n"
349 : : "DROP INDEX IF EXISTS buildids9_r_de_idx;\n"
350 : : "DROP INDEX IF EXISTS buildids9_f_de_idx;\n"
351 : : "DROP VIEW IF EXISTS buildids9_query_s;\n"
352 : : "DROP VIEW IF EXISTS buildids9_query_e;\n"
353 : : "DROP VIEW IF EXISTS buildids9_query_d;\n"
354 : : "DROP TABLE IF EXISTS buildids9_r_sdef;\n"
355 : : "DROP TABLE IF EXISTS buildids9_r_sref;\n"
356 : : "DROP TABLE IF EXISTS buildids9_r_de;\n"
357 : : "DROP TABLE IF EXISTS buildids9_f_s;\n"
358 : : "DROP TABLE IF EXISTS buildids9_f_de;\n"
359 : : "DROP TABLE IF EXISTS buildids9_file_mtime_scanned;\n"
360 : : "DROP TABLE IF EXISTS buildids9_buildids;\n"
361 : : "DROP TABLE IF EXISTS buildids9_files;\n"
362 : : // buildids8: slim the sref table
363 : : "drop table if exists buildids8_f_de;\n"
364 : : "drop table if exists buildids8_f_s;\n"
365 : : "drop table if exists buildids8_r_de;\n"
366 : : "drop table if exists buildids8_r_sref;\n"
367 : : "drop table if exists buildids8_r_sdef;\n"
368 : : "drop table if exists buildids8_file_mtime_scanned;\n"
369 : : "drop table if exists buildids8_files;\n"
370 : : "drop table if exists buildids8_buildids;\n"
371 : : // buildids7: separate _norm table into dense subtype tables
372 : : "drop table if exists buildids7_f_de;\n"
373 : : "drop table if exists buildids7_f_s;\n"
374 : : "drop table if exists buildids7_r_de;\n"
375 : : "drop table if exists buildids7_r_sref;\n"
376 : : "drop table if exists buildids7_r_sdef;\n"
377 : : "drop table if exists buildids7_file_mtime_scanned;\n"
378 : : "drop table if exists buildids7_files;\n"
379 : : "drop table if exists buildids7_buildids;\n"
380 : : // buildids6: drop bolo/rfolo again, represent sources / rpmcontents in main table
381 : : "drop table if exists buildids6_norm;\n"
382 : : "drop table if exists buildids6_files;\n"
383 : : "drop table if exists buildids6_buildids;\n"
384 : : "drop view if exists buildids6;\n"
385 : : // buildids5: redefine srcfile1 column to be '.'-less (for rpms)
386 : : "drop table if exists buildids5_norm;\n"
387 : : "drop table if exists buildids5_files;\n"
388 : : "drop table if exists buildids5_buildids;\n"
389 : : "drop table if exists buildids5_bolo;\n"
390 : : "drop table if exists buildids5_rfolo;\n"
391 : : "drop view if exists buildids5;\n"
392 : : // buildids4: introduce rpmfile RFOLO
393 : : "drop table if exists buildids4_norm;\n"
394 : : "drop table if exists buildids4_files;\n"
395 : : "drop table if exists buildids4_buildids;\n"
396 : : "drop table if exists buildids4_bolo;\n"
397 : : "drop table if exists buildids4_rfolo;\n"
398 : : "drop view if exists buildids4;\n"
399 : : // buildids3*: split out srcfile BOLO
400 : : "drop table if exists buildids3_norm;\n"
401 : : "drop table if exists buildids3_files;\n"
402 : : "drop table if exists buildids3_buildids;\n"
403 : : "drop table if exists buildids3_bolo;\n"
404 : : "drop view if exists buildids3;\n"
405 : : // buildids2: normalized buildid and filenames into interning tables;
406 : : "drop table if exists buildids2_norm;\n"
407 : : "drop table if exists buildids2_files;\n"
408 : : "drop table if exists buildids2_buildids;\n"
409 : : "drop view if exists buildids2;\n"
410 : : // buildids1: made buildid and artifacttype NULLable, to represent cached-negative
411 : : // lookups from sources, e.g. files or rpms that contain no buildid-indexable content
412 : : "drop table if exists buildids1;\n"
413 : : // buildids: original
414 : : "drop table if exists buildids;\n"
415 : : ;
416 : :
417 : : static const char DEBUGINFOD_SQLITE_CLEANUP_DDL[] =
418 : : "pragma wal_checkpoint = truncate;\n" // clean out any preexisting wal file
419 : : ;
420 : :
421 : :
422 : :
423 : :
424 : : /* Name and version of program. */
425 : : ARGP_PROGRAM_VERSION_HOOK_DEF = print_version;
426 : :
427 : : /* Bug report address. */
428 : : ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT;
429 : :
430 : : /* Definitions of arguments for argp functions. */
431 : : static const struct argp_option options[] =
432 : : {
433 : : { NULL, 0, NULL, 0, "Scanners:", 1 },
434 : : { "scan-file-dir", 'F', NULL, 0, "Enable ELF/DWARF file scanning.", 0 },
435 : : { "scan-rpm-dir", 'R', NULL, 0, "Enable RPM scanning.", 0 },
436 : : { "scan-deb-dir", 'U', NULL, 0, "Enable DEB scanning.", 0 },
437 : : { "scan-archive", 'Z', "EXT=CMD", 0, "Enable arbitrary archive scanning.", 0 },
438 : : // "source-oci-imageregistry" ...
439 : :
440 : : { NULL, 0, NULL, 0, "Options:", 2 },
441 : : { "logical", 'L', NULL, 0, "Follow symlinks, default=ignore.", 0 },
442 : : { "rescan-time", 't', "SECONDS", 0, "Number of seconds to wait between rescans, 0=disable.", 0 },
443 : : { "groom-time", 'g', "SECONDS", 0, "Number of seconds to wait between database grooming, 0=disable.", 0 },
444 : : { "maxigroom", 'G', NULL, 0, "Run a complete database groom/shrink pass at startup.", 0 },
445 : : { "concurrency", 'c', "NUM", 0, "Limit scanning thread concurrency to NUM, default=#CPUs.", 0 },
446 : : { "connection-pool", 'C', "NUM", OPTION_ARG_OPTIONAL,
447 : : "Use webapi connection pool with NUM threads, default=unlim.", 0 },
448 : : { "include", 'I', "REGEX", 0, "Include files matching REGEX, default=all.", 0 },
449 : : { "exclude", 'X', "REGEX", 0, "Exclude files matching REGEX, default=none.", 0 },
450 : : { "port", 'p', "NUM", 0, "HTTP port to listen on, default 8002.", 0 },
451 : : { "database", 'd', "FILE", 0, "Path to sqlite database.", 0 },
452 : : { "ddl", 'D', "SQL", 0, "Apply extra sqlite ddl/pragma to connection.", 0 },
453 : : { "verbose", 'v', NULL, 0, "Increase verbosity.", 0 },
454 : : { "regex-groom", 'r', NULL, 0,"Uses regexes from -I and -X arguments to groom the database.",0},
455 : : #define ARGP_KEY_FDCACHE_FDS 0x1001
456 : : { "fdcache-fds", ARGP_KEY_FDCACHE_FDS, "NUM", OPTION_HIDDEN, NULL, 0 },
457 : : #define ARGP_KEY_FDCACHE_MBS 0x1002
458 : : { "fdcache-mbs", ARGP_KEY_FDCACHE_MBS, "MB", 0, "Maximum total size of archive file fdcache.", 0 },
459 : : #define ARGP_KEY_FDCACHE_PREFETCH 0x1003
460 : : { "fdcache-prefetch", ARGP_KEY_FDCACHE_PREFETCH, "NUM", 0, "Number of archive files to prefetch into fdcache.", 0 },
461 : : #define ARGP_KEY_FDCACHE_MINTMP 0x1004
462 : : { "fdcache-mintmp", ARGP_KEY_FDCACHE_MINTMP, "NUM", 0, "Minimum free space% on tmpdir.", 0 },
463 : : #define ARGP_KEY_FDCACHE_PREFETCH_MBS 0x1005
464 : : { "fdcache-prefetch-mbs", ARGP_KEY_FDCACHE_PREFETCH_MBS, "MB", OPTION_HIDDEN, NULL, 0},
465 : : #define ARGP_KEY_FDCACHE_PREFETCH_FDS 0x1006
466 : : { "fdcache-prefetch-fds", ARGP_KEY_FDCACHE_PREFETCH_FDS, "NUM", OPTION_HIDDEN, NULL, 0},
467 : : #define ARGP_KEY_FORWARDED_TTL_LIMIT 0x1007
468 : : {"forwarded-ttl-limit", ARGP_KEY_FORWARDED_TTL_LIMIT, "NUM", 0, "Limit of X-Forwarded-For hops, default 8.", 0},
469 : : #define ARGP_KEY_PASSIVE 0x1008
470 : : { "passive", ARGP_KEY_PASSIVE, NULL, 0, "Do not scan or groom, read-only database.", 0 },
471 : : #define ARGP_KEY_DISABLE_SOURCE_SCAN 0x1009
472 : : { "disable-source-scan", ARGP_KEY_DISABLE_SOURCE_SCAN, NULL, 0, "Do not scan dwarf source info.", 0 },
473 : : #define ARGP_SCAN_CHECKPOINT 0x100A
474 : : { "scan-checkpoint", ARGP_SCAN_CHECKPOINT, "NUM", 0, "Number of files scanned before a WAL checkpoint.", 0 },
475 : : #ifdef ENABLE_IMA_VERIFICATION
476 : : #define ARGP_KEY_KOJI_SIGCACHE 0x100B
477 : : { "koji-sigcache", ARGP_KEY_KOJI_SIGCACHE, NULL, 0, "Do a koji specific mapping of rpm paths to get IMA signatures.", 0 },
478 : : #endif
479 : : #define ARGP_KEY_METADATA_MAXTIME 0x100C
480 : : { "metadata-maxtime", ARGP_KEY_METADATA_MAXTIME, "SECONDS", 0,
481 : : "Number of seconds to limit metadata query run time, 0=unlimited.", 0 },
482 : : { NULL, 0, NULL, 0, NULL, 0 },
483 : : };
484 : :
485 : : /* Short description of program. */
486 : : static const char doc[] = "Serve debuginfo-related content across HTTP from files under PATHs.";
487 : :
488 : : /* Strings for arguments in help texts. */
489 : : static const char args_doc[] = "[PATH ...]";
490 : :
491 : : /* Prototype for option handler. */
492 : : static error_t parse_opt (int key, char *arg, struct argp_state *state);
493 : :
494 : : static unsigned default_concurrency();
495 : :
496 : : /* Data structure to communicate with argp functions. */
497 : : static struct argp argp =
498 : : {
499 : : options, parse_opt, args_doc, doc, NULL, NULL, NULL
500 : : };
501 : :
502 : :
503 : : static string db_path;
504 : : static sqlite3 *db; // single connection, serialized across all our threads!
505 : : static sqlite3 *dbq; // webapi query-servicing readonly connection, serialized ditto!
506 : : static unsigned verbose;
507 : : static volatile sig_atomic_t interrupted = 0;
508 : : static volatile sig_atomic_t forced_rescan_count = 0;
509 : : static volatile sig_atomic_t sigusr1 = 0;
510 : : static volatile sig_atomic_t forced_groom_count = 0;
511 : : static volatile sig_atomic_t sigusr2 = 0;
512 : : static unsigned http_port = 8002;
513 : : static unsigned rescan_s = 300;
514 : : static unsigned groom_s = 86400;
515 : : static bool maxigroom = false;
516 : : static unsigned concurrency = default_concurrency();
517 : : static int connection_pool = 0;
518 : : static set<string> source_paths;
519 : : static bool scan_files = false;
520 : : static map<string,string> scan_archives;
521 : : static vector<string> extra_ddl;
522 : : static regex_t file_include_regex;
523 : : static regex_t file_exclude_regex;
524 : : static bool regex_groom = false;
525 : : static bool traverse_logical;
526 : : static long fdcache_mbs;
527 : : static long fdcache_prefetch;
528 : : static long fdcache_mintmp;
529 : : static unsigned forwarded_ttl_limit = 8;
530 : : static bool scan_source_info = true;
531 : : static string tmpdir;
532 : : static bool passive_p = false;
533 : : static long scan_checkpoint = 256;
534 : : #ifdef ENABLE_IMA_VERIFICATION
535 : : static bool requires_koji_sigcache_mapping = false;
536 : : #endif
537 : : static unsigned metadata_maxtime_s = 5;
538 : :
539 : : static void set_metric(const string& key, double value);
540 : : static void inc_metric(const string& key);
541 : : static void add_metric(const string& metric,
542 : : double value);
543 : : static void set_metric(const string& metric,
544 : : const string& lname, const string& lvalue,
545 : : double value);
546 : : static void inc_metric(const string& metric,
547 : : const string& lname, const string& lvalue);
548 : : static void add_metric(const string& metric,
549 : : const string& lname, const string& lvalue,
550 : : double value);
551 : : static void inc_metric(const string& metric,
552 : : const string& lname, const string& lvalue,
553 : : const string& rname, const string& rvalue);
554 : : static void add_metric(const string& metric,
555 : : const string& lname, const string& lvalue,
556 : : const string& rname, const string& rvalue,
557 : : double value);
558 : :
559 : :
560 : : class tmp_inc_metric { // a RAII style wrapper for exception-safe scoped increment & decrement
561 : : string m, n, v;
562 : : public:
563 : 3341 : tmp_inc_metric(const string& mname, const string& lname, const string& lvalue):
564 [ + - + - ]: 3341 : m(mname), n(lname), v(lvalue)
565 : : {
566 [ + - ]: 3341 : add_metric (m, n, v, 1);
567 [ - - - - : 3341 : }
- - ]
568 : 3341 : ~tmp_inc_metric()
569 : : {
570 : 3341 : add_metric (m, n, v, -1);
571 [ - + - + : 3341 : }
- + ]
572 : : };
573 : :
574 : : class tmp_ms_metric { // a RAII style wrapper for exception-safe scoped timing
575 : : string m, n, v;
576 : : struct timespec ts_start;
577 : : public:
578 : 346034 : tmp_ms_metric(const string& mname, const string& lname, const string& lvalue):
579 [ + - + - ]: 346034 : m(mname), n(lname), v(lvalue)
580 : : {
581 : 346023 : clock_gettime (CLOCK_MONOTONIC, & ts_start);
582 [ - - - - ]: 346247 : }
583 : 346292 : ~tmp_ms_metric()
584 : : {
585 : 346292 : struct timespec ts_end;
586 : 346292 : clock_gettime (CLOCK_MONOTONIC, & ts_end);
587 : 346280 : double deltas = (ts_end.tv_sec - ts_start.tv_sec)
588 : 346280 : + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9;
589 : :
590 [ + - ]: 346280 : add_metric (m + "_milliseconds_sum", n, v, (deltas*1000.0));
591 [ + + ]: 346306 : inc_metric (m + "_milliseconds_count", n, v);
592 [ + + - + : 638994 : }
- + ]
593 : : };
594 : :
595 : :
596 : : /* Handle program arguments. */
597 : : static error_t
598 : 1226 : parse_opt (int key, char *arg,
599 : : struct argp_state *state __attribute__ ((unused)))
600 : : {
601 : 1226 : int rc;
602 [ + + + + : 1226 : switch (key)
+ + + + +
- + + - -
+ + + + +
+ + + + +
- + - + ]
603 : : {
604 : 296 : case 'v': verbose ++; break;
605 : 80 : case 'd':
606 : : /* When using the in-memory database make sure it is shareable,
607 : : so we can open it twice as read/write and read-only. */
608 [ + + ]: 80 : if (strcmp (arg, ":memory:") == 0)
609 : 1240 : db_path = "file::memory:?cache=shared";
610 : : else
611 [ + - ]: 132 : db_path = string(arg);
612 : : break;
613 : 80 : case 'p': http_port = (unsigned) atoi(arg);
614 [ + - ]: 80 : if (http_port == 0 || http_port > 65535)
615 : 0 : argp_failure(state, 1, EINVAL, "port number");
616 : : break;
617 : 50 : case 'F': scan_files = true; break;
618 : 24 : case 'R':
619 [ + - + - : 24 : scan_archives[".rpm"]="cat"; // libarchive groks rpm natively
- + ]
620 : 24 : break;
621 : 18 : case 'U':
622 [ + - + - : 18 : scan_archives[".deb"]="(bsdtar -O -x -f - data.tar\\*)<";
- + ]
623 [ + - + - : 18 : scan_archives[".ddeb"]="(bsdtar -O -x -f - data.tar\\*)<";
- + ]
624 [ + - + - : 18 : scan_archives[".ipk"]="(bsdtar -O -x -f - data.tar\\*)<";
- + ]
625 : : // .udeb too?
626 : 18 : break;
627 : 40 : case 'Z':
628 : 40 : {
629 [ - + ]: 40 : char* extension = strchr(arg, '=');
630 [ - + ]: 40 : if (arg[0] == '\0')
631 : 0 : argp_failure(state, 1, EINVAL, "missing EXT");
632 [ + + ]: 40 : else if (extension)
633 [ + - + - : 20 : scan_archives[string(arg, (extension-arg))]=string(extension+1);
- + - + -
- ]
634 : : else
635 [ + - + - : 20 : scan_archives[string(arg)]=string("cat");
- + - + -
- ]
636 : : }
637 : : break;
638 : 8 : case 'L':
639 [ - + ]: 8 : if (passive_p)
640 : 0 : argp_failure(state, 1, EINVAL, "-L option inconsistent with passive mode");
641 : 8 : traverse_logical = true;
642 : 8 : break;
643 : 0 : case 'D':
644 [ # # ]: 0 : if (passive_p)
645 : 0 : argp_failure(state, 1, EINVAL, "-D option inconsistent with passive mode");
646 [ # # ]: 0 : extra_ddl.push_back(string(arg));
647 : 0 : break;
648 : 64 : case 't':
649 [ - + ]: 64 : if (passive_p)
650 : 0 : argp_failure(state, 1, EINVAL, "-t option inconsistent with passive mode");
651 : 64 : rescan_s = (unsigned) atoi(arg);
652 : 64 : break;
653 : 64 : case 'g':
654 [ - + ]: 64 : if (passive_p)
655 : 0 : argp_failure(state, 1, EINVAL, "-g option inconsistent with passive mode");
656 : 64 : groom_s = (unsigned) atoi(arg);
657 : 64 : break;
658 : 0 : case 'G':
659 [ # # ]: 0 : if (passive_p)
660 : 0 : argp_failure(state, 1, EINVAL, "-G option inconsistent with passive mode");
661 : 0 : maxigroom = true;
662 : 0 : break;
663 : 0 : case 'c':
664 [ # # ]: 0 : if (passive_p)
665 : 0 : argp_failure(state, 1, EINVAL, "-c option inconsistent with passive mode");
666 : 0 : concurrency = (unsigned) atoi(arg);
667 [ # # ]: 0 : if (concurrency < 1) concurrency = 1;
668 : : break;
669 : 6 : case 'C':
670 [ + + ]: 6 : if (arg)
671 : : {
672 : 4 : connection_pool = atoi(arg);
673 [ + - ]: 4 : if (connection_pool < 2)
674 : 0 : argp_failure(state, 1, EINVAL, "-C NUM minimum 2");
675 : : }
676 : : break;
677 : 4 : case 'I':
678 : : // NB: no problem with unconditional free here - an earlier failed regcomp would exit program
679 [ - + ]: 4 : if (passive_p)
680 : 0 : argp_failure(state, 1, EINVAL, "-I option inconsistent with passive mode");
681 : 4 : regfree (&file_include_regex);
682 : 4 : rc = regcomp (&file_include_regex, arg, REG_EXTENDED|REG_NOSUB);
683 [ + - ]: 4 : if (rc != 0)
684 : 0 : argp_failure(state, 1, EINVAL, "regular expression");
685 : : break;
686 : 6 : case 'X':
687 [ - + ]: 6 : if (passive_p)
688 : 0 : argp_failure(state, 1, EINVAL, "-X option inconsistent with passive mode");
689 : 6 : regfree (&file_exclude_regex);
690 : 6 : rc = regcomp (&file_exclude_regex, arg, REG_EXTENDED|REG_NOSUB);
691 [ + - ]: 6 : if (rc != 0)
692 : 0 : argp_failure(state, 1, EINVAL, "regular expression");
693 : : break;
694 : 4 : case 'r':
695 [ - + ]: 4 : if (passive_p)
696 : 0 : argp_failure(state, 1, EINVAL, "-r option inconsistent with passive mode");
697 : 4 : regex_groom = true;
698 : 4 : break;
699 : : case ARGP_KEY_FDCACHE_FDS:
700 : : // deprecated
701 : : break;
702 : 4 : case ARGP_KEY_FDCACHE_MBS:
703 : 4 : fdcache_mbs = atol (arg);
704 : 4 : break;
705 : 4 : case ARGP_KEY_FDCACHE_PREFETCH:
706 : 4 : fdcache_prefetch = atol (arg);
707 : 4 : break;
708 : 4 : case ARGP_KEY_FDCACHE_MINTMP:
709 : 4 : fdcache_mintmp = atol (arg);
710 [ + - ]: 4 : if( fdcache_mintmp > 100 || fdcache_mintmp < 0 )
711 : 0 : argp_failure(state, 1, EINVAL, "fdcache mintmp percent");
712 : : break;
713 : 4 : case ARGP_KEY_FORWARDED_TTL_LIMIT:
714 : 4 : forwarded_ttl_limit = (unsigned) atoi(arg);
715 : 4 : break;
716 : 110 : case ARGP_KEY_ARG:
717 [ + - ]: 110 : source_paths.insert(string(arg));
718 : 110 : break;
719 : : case ARGP_KEY_FDCACHE_PREFETCH_FDS:
720 : : // deprecated
721 : : break;
722 : : case ARGP_KEY_FDCACHE_PREFETCH_MBS:
723 : : // deprecated
724 : : break;
725 : 2 : case ARGP_KEY_PASSIVE:
726 : 2 : passive_p = true;
727 [ + - ]: 2 : if (source_paths.size() > 0
728 [ + - ]: 2 : || maxigroom
729 [ + - ]: 2 : || extra_ddl.size() > 0
730 [ + - + - ]: 4 : || traverse_logical)
731 : : // other conflicting options tricky to check
732 : 0 : argp_failure(state, 1, EINVAL, "inconsistent options with passive mode");
733 : : break;
734 : 0 : case ARGP_KEY_DISABLE_SOURCE_SCAN:
735 : 0 : scan_source_info = false;
736 : 0 : break;
737 : 2 : case ARGP_SCAN_CHECKPOINT:
738 : 2 : scan_checkpoint = atol (arg);
739 [ + - ]: 2 : if (scan_checkpoint < 0)
740 : 0 : argp_failure(state, 1, EINVAL, "scan checkpoint");
741 : : break;
742 : 0 : case ARGP_KEY_METADATA_MAXTIME:
743 : 0 : metadata_maxtime_s = (unsigned) atoi(arg);
744 : 0 : break;
745 : : #ifdef ENABLE_IMA_VERIFICATION
746 : : case ARGP_KEY_KOJI_SIGCACHE:
747 : : requires_koji_sigcache_mapping = true;
748 : : break;
749 : : #endif
750 : : // case 'h': argp_state_help (state, stderr, ARGP_HELP_LONG|ARGP_HELP_EXIT_OK);
751 : : default: return ARGP_ERR_UNKNOWN;
752 : : }
753 : :
754 : : return 0;
755 : : }
756 : :
757 : :
758 : : ////////////////////////////////////////////////////////////////////////
759 : :
760 : :
761 : : static void add_mhd_response_header (struct MHD_Response *r,
762 : : const char *h, const char *v);
763 : :
764 : : // represent errors that may get reported to an ostream and/or a libmicrohttpd connection
765 : :
766 : 8 : struct reportable_exception
767 : : {
768 : : int code;
769 : : string message;
770 : :
771 [ - - + - : 106 : reportable_exception(int c, const string& m): code(c), message(m) {}
- - + - +
- ]
772 [ - - - - : 604 : reportable_exception(const string& m): code(503), message(m) {}
- - - - -
- - - + -
- - - - +
- - - - -
- - - - -
- - - - -
- - - - -
- - - + -
- - ]
773 : : reportable_exception(): code(503), message() {}
774 : :
775 : : void report(ostream& o) const; // defined under obatched() class below
776 : :
777 : 634 : MHD_RESULT mhd_send_response(MHD_Connection* c) const {
778 : 1268 : MHD_Response* r = MHD_create_response_from_buffer (message.size(),
779 : 634 : (void*) message.c_str(),
780 : : MHD_RESPMEM_MUST_COPY);
781 : 634 : add_mhd_response_header (r, "Content-Type", "text/plain");
782 : 634 : MHD_RESULT rc = MHD_queue_response (c, code, r);
783 : 634 : MHD_destroy_response (r);
784 : 634 : return rc;
785 : : }
786 : : };
787 : :
788 : :
789 : : struct sqlite_exception: public reportable_exception
790 : : {
791 : 0 : sqlite_exception(int rc, const string& msg):
792 [ # # # # : 0 : reportable_exception(string("sqlite3 error: ") + msg + ": " + string(sqlite3_errstr(rc) ?: "?")) {
# # # # #
# # # # #
# # # # #
# ]
793 [ # # # # : 0 : inc_metric("error_count","sqlite3",sqlite3_errstr(rc));
# # # # #
# # # # #
# # # # ]
794 [ # # ]: 0 : }
795 : : };
796 : :
797 [ + - - - ]: 4 : struct libc_exception: public reportable_exception
798 : : {
799 : 594 : libc_exception(int rc, const string& msg):
800 [ - + + - : 2376 : reportable_exception(string("libc error: ") + msg + ": " + string(strerror(rc) ?: "?")) {
+ - + - +
- - + - +
- + + - -
- ]
801 [ + - + - : 1188 : inc_metric("error_count","libc",strerror(rc));
+ - + - -
+ + - - -
- - ]
802 [ - - ]: 594 : }
803 : : };
804 : :
805 : :
806 : : struct archive_exception: public reportable_exception
807 : : {
808 : 0 : archive_exception(const string& msg):
809 [ # # # # : 0 : reportable_exception(string("libarchive error: ") + msg) {
# # ]
810 [ # # # # : 0 : inc_metric("error_count","libarchive",msg);
# # # # #
# ]
811 [ # # ]: 0 : }
812 : 0 : archive_exception(struct archive* a, const string& msg):
813 [ # # # # : 0 : reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) {
# # # # #
# # # # #
# # # # #
# ]
814 [ # # # # : 0 : inc_metric("error_count","libarchive",msg + ": " + string(archive_error_string(a) ?: "?"));
# # # # #
# # # # #
# # # # #
# # # # #
# # # # #
# ]
815 [ # # ]: 0 : }
816 : : };
817 : :
818 : :
819 : : struct elfutils_exception: public reportable_exception
820 : : {
821 : 0 : elfutils_exception(int rc, const string& msg):
822 [ # # # # : 0 : reportable_exception(string("elfutils error: ") + msg + ": " + string(elf_errmsg(rc) ?: "?")) {
# # # # #
# # # # #
# # # # #
# ]
823 [ # # # # : 0 : inc_metric("error_count","elfutils",elf_errmsg(rc));
# # # # #
# # # # #
# # # # ]
824 [ # # ]: 0 : }
825 : : };
826 : :
827 : :
828 : : ////////////////////////////////////////////////////////////////////////
829 : :
830 : : template <typename Payload>
831 : : class workq
832 : : {
833 : : unordered_set<Payload> q; // eliminate duplicates
834 : : mutex mtx;
835 : : condition_variable cv;
836 : : bool dead;
837 : : unsigned idlers; // number of threads busy with wait_idle / done_idle
838 : : unsigned fronters; // number of threads busy with wait_front / done_front
839 : :
840 : : public:
841 : 80 : workq() { dead = false; idlers = 0; fronters = 0; }
842 : 80 : ~workq() {}
843 : :
844 : 1306 : void push_back(const Payload& p)
845 : : {
846 : 1306 : unique_lock<mutex> lock(mtx);
847 [ + - ]: 1306 : q.insert (p);
848 [ + - + - : 2612 : set_metric("thread_work_pending","role","scan", q.size());
+ - + - -
+ - + - -
- - ]
849 : 1306 : cv.notify_all();
850 : 1306 : }
851 : :
852 : : // kill this workqueue, wake up all idlers / scanners
853 : 80 : void nuke() {
854 : 80 : unique_lock<mutex> lock(mtx);
855 : : // optional: q.clear();
856 : 80 : dead = true;
857 : 80 : cv.notify_all();
858 : 80 : }
859 : :
860 : : // clear the workqueue, when scanning is interrupted with USR2
861 : 0 : void clear() {
862 : 0 : unique_lock<mutex> lock(mtx);
863 : 0 : q.clear();
864 [ # # # # : 0 : set_metric("thread_work_pending","role","scan", q.size());
# # # # #
# # # # #
# # ]
865 : : // NB: there may still be some live fronters
866 : 0 : cv.notify_all(); // maybe wake up waiting idlers
867 : 0 : }
868 : :
869 : : // block this scanner thread until there is work to do and no active idler
870 : 1594 : bool wait_front (Payload& p)
871 : : {
872 : 1594 : unique_lock<mutex> lock(mtx);
873 [ + + + + : 5145 : while (!dead && (q.size() == 0 || idlers > 0))
+ + ]
874 [ + - ]: 3551 : cv.wait(lock);
875 [ + + ]: 1594 : if (dead)
876 : : return false;
877 : : else
878 : : {
879 [ + - ]: 1306 : p = * q.begin();
880 : 1306 : q.erase (q.begin());
881 : 1306 : fronters ++; // prevent idlers from starting awhile, even if empty q
882 [ + - + - : 2612 : set_metric("thread_work_pending","role","scan", q.size());
+ - + - -
+ - + - -
- - - - ]
883 : : // NB: don't wake up idlers yet! The consumer is busy
884 : : // processing this element until it calls done_front().
885 : 1306 : return true;
886 : : }
887 : 1594 : }
888 : :
889 : : // notify waitq that scanner thread is done with that last item
890 : 1306 : void done_front ()
891 : : {
892 : 1306 : unique_lock<mutex> lock(mtx);
893 : 1306 : fronters --;
894 [ + + + + ]: 1306 : if (q.size() == 0 && fronters == 0)
895 : 97 : cv.notify_all(); // maybe wake up waiting idlers
896 : 1306 : }
897 : :
898 : : // block this idler thread until there is no work to do
899 : 610 : void wait_idle ()
900 : : {
901 : 610 : unique_lock<mutex> lock(mtx);
902 : 610 : cv.notify_all(); // maybe wake up waiting scanners
903 [ + + + + : 648 : while (!dead && ((q.size() != 0) || fronters > 0))
+ + ]
904 [ + - ]: 38 : cv.wait(lock);
905 [ + - ]: 610 : idlers ++;
906 : 610 : }
907 : :
908 : 532 : void done_idle ()
909 : : {
910 : 532 : unique_lock<mutex> lock(mtx);
911 : 532 : idlers --;
912 : 532 : cv.notify_all(); // maybe wake up waiting scanners, but probably not (shutting down)
913 : 532 : }
914 : : };
915 : :
916 : : typedef struct stat stat_t;
917 : : typedef pair<string,stat_t> scan_payload;
918 : : inline bool operator< (const scan_payload& a, const scan_payload& b)
919 : : {
920 : : return a.first < b.first; // don't bother compare the stat fields
921 : : }
922 : :
923 : : namespace std { // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56480
924 : : template<> struct hash<::scan_payload>
925 : : {
926 : 5390 : std::size_t operator() (const ::scan_payload& p) const noexcept
927 : : {
928 [ + + + + ]: 5390 : return hash<string>()(p.first);
929 : : }
930 : : };
931 : : template<> struct equal_to<::scan_payload>
932 : : {
933 : 528 : std::size_t operator() (const ::scan_payload& a, const ::scan_payload& b) const noexcept
934 : : {
935 [ - + - - ]: 528 : return a.first == b.first;
936 : : }
937 : : };
938 : : }
939 : :
940 : : static workq<scan_payload> scanq; // just a single one
941 : : // producer & idler: thread_main_fts_source_paths()
942 : : // consumer: thread_main_scanner()
943 : : // idler: thread_main_groom()
944 : :
945 : :
946 : : ////////////////////////////////////////////////////////////////////////
947 : :
948 : : // Unique set is a thread-safe structure that lends 'ownership' of a value
949 : : // to a thread. Other threads requesting the same thing are made to wait.
950 : : // It's like a semaphore-on-demand.
951 : : template <typename T>
952 : : class unique_set
953 : : {
954 : : private:
955 : : set<T> values;
956 : : mutex mtx;
957 : : condition_variable cv;
958 : : public:
959 : 60 : unique_set() {}
960 : 60 : ~unique_set() {}
961 : :
962 : 2572 : void acquire(const T& value)
963 : : {
964 : 2572 : unique_lock<mutex> lock(mtx);
965 [ + + ]: 3040 : while (values.find(value) != values.end())
966 [ + - ]: 468 : cv.wait(lock);
967 [ + - ]: 2572 : values.insert(value);
968 : 2572 : }
969 : :
970 : 2572 : void release(const T& value)
971 : : {
972 : 2572 : unique_lock<mutex> lock(mtx);
973 : : // assert (values.find(value) != values.end());
974 : 2572 : values.erase(value);
975 : 2572 : cv.notify_all();
976 : 2572 : }
977 : : };
978 : :
979 : :
980 : : // This is the object that's instantiate to uniquely hold a value in a
981 : : // RAII-pattern way.
982 : : template <typename T>
983 : : class unique_set_reserver
984 : : {
985 : : private:
986 : : unique_set<T>& please_hold;
987 : : T mine;
988 : : public:
989 : 2572 : unique_set_reserver(unique_set<T>& t, const T& value):
990 [ + - - - ]: 2572 : please_hold(t), mine(value) { please_hold.acquire(mine); }
991 [ + - ]: 2572 : ~unique_set_reserver() { please_hold.release(mine); }
992 : : };
993 : :
994 : :
995 : : ////////////////////////////////////////////////////////////////////////
996 : :
997 : : // periodic_barrier is a concurrency control object that lets N threads
998 : : // periodically (based on counter value) agree to wait at a barrier,
999 : : // let one of them carry out some work, then be set free
1000 : :
1001 : : class periodic_barrier
1002 : : {
1003 : : private:
1004 : : unsigned period; // number of count() reports to trigger barrier activation
1005 : : unsigned threads; // number of threads participating
1006 : : mutex mtx; // protects all the following fields
1007 : : unsigned counter; // count of count() reports in the current generation
1008 : : unsigned generation; // barrier activation generation
1009 : : unsigned waiting; // number of threads waiting for barrier
1010 : : bool dead; // bring out your
1011 : : condition_variable cv;
1012 : : public:
1013 : 72 : periodic_barrier(unsigned t, unsigned p):
1014 : 72 : period(p), threads(t), counter(0), generation(0), waiting(0), dead(false) { }
1015 : : virtual ~periodic_barrier() {}
1016 : :
1017 : : virtual void periodic_barrier_work() noexcept = 0;
1018 : 72 : void nuke() {
1019 : 72 : unique_lock<mutex> lock(mtx);
1020 : 72 : dead = true;
1021 : 72 : cv.notify_all();
1022 : 72 : }
1023 : :
1024 : 1594 : void count()
1025 : : {
1026 : 1594 : unique_lock<mutex> lock(mtx);
1027 : 1594 : unsigned prev_generation = this->generation;
1028 [ + + ]: 1594 : if (counter < period-1) // normal case: counter just freely running
1029 : : {
1030 : 1450 : counter ++;
1031 : 1450 : return;
1032 : : }
1033 [ + + ]: 144 : else if (counter == period-1) // we're the doer
1034 : : {
1035 : 36 : counter = period; // entering barrier holding phase
1036 : 36 : cv.notify_all();
1037 [ + + + - ]: 178 : while (waiting < threads-1 && !dead)
1038 [ + - ]: 106 : cv.wait(lock);
1039 : : // all other threads are now stuck in the barrier
1040 : 36 : this->periodic_barrier_work(); // NB: we're holding the mutex the whole time
1041 : : // reset for next barrier, releasing other waiters
1042 : 36 : counter = 0;
1043 : 36 : generation ++;
1044 : 36 : cv.notify_all();
1045 : 36 : return;
1046 : : }
1047 [ + - ]: 108 : else if (counter == period) // we're a waiter, in holding phase
1048 : : {
1049 : 108 : waiting ++;
1050 : 108 : cv.notify_all();
1051 [ + + + - : 391 : while (counter == period && generation == prev_generation && !dead)
+ - ]
1052 [ + - ]: 175 : cv.wait(lock);
1053 : 108 : waiting --;
1054 : 108 : return;
1055 : : }
1056 : 1594 : }
1057 : : };
1058 : :
1059 : :
1060 : :
1061 : : ////////////////////////////////////////////////////////////////////////
1062 : :
1063 : :
1064 : : // Print a standard timestamp.
1065 : : static ostream&
1066 : 44556 : timestamp (ostream &o)
1067 : : {
1068 : 44556 : char datebuf[80];
1069 : 44556 : char *now2 = NULL;
1070 : 44556 : time_t now_t = time(NULL);
1071 : 44560 : struct tm now;
1072 : 44560 : struct tm *nowp = gmtime_r (&now_t, &now);
1073 [ + - ]: 44565 : if (nowp)
1074 : : {
1075 : 44565 : (void) strftime (datebuf, sizeof (datebuf), "%c", nowp);
1076 : 44565 : now2 = datebuf;
1077 : : }
1078 : :
1079 : 44565 : return o << "[" << (now2 ? now2 : "") << "] "
1080 [ - + ]: 44565 : << "(" << getpid () << "/" << tid() << "): ";
1081 : : }
1082 : :
1083 : :
1084 : : // A little class that impersonates an ostream to the extent that it can
1085 : : // take << streaming operations. It batches up the bits into an internal
1086 : : // stringstream until it is destroyed; then flushes to the original ostream.
1087 : : // It adds a timestamp
1088 : : class obatched
1089 : : {
1090 : : private:
1091 : : ostream& o;
1092 : : stringstream stro;
1093 : : static mutex lock;
1094 : : public:
1095 : 44566 : obatched(ostream& oo, bool timestamp_p = true): o(oo)
1096 : : {
1097 [ + + ]: 44566 : if (timestamp_p)
1098 [ + - ]: 44563 : timestamp(stro);
1099 : 44567 : }
1100 : 44552 : ~obatched()
1101 : : {
1102 : 44552 : unique_lock<mutex> do_not_cross_the_streams(obatched::lock);
1103 [ + - ]: 44567 : o << stro.str();
1104 : 44567 : o.flush();
1105 : 44567 : }
1106 : : operator ostream& () { return stro; }
1107 [ - - + - : 34561 : template <typename T> ostream& operator << (const T& t) { stro << t; return stro; }
+ - + - +
- + - + -
- - - - -
- + - + -
+ - + - +
- + - + -
+ - + - +
- + - + -
- - + - +
- + - + -
+ - + - +
- + - - -
+ - + - +
- + - + -
+ - + - -
- + - - -
+ - - - +
- + - + -
- - - - -
- - - - -
- - + - -
- + - + -
- - + - +
- - - + -
- - + - -
- - - + -
+ - + - +
- + - + -
- - - - ]
1108 : : };
1109 : : mutex obatched::lock; // just the one, since cout/cerr iostreams are not thread-safe
1110 : :
1111 : :
1112 : 696 : void reportable_exception::report(ostream& o) const {
1113 [ + - + - ]: 696 : obatched(o) << message << endl;
1114 : 696 : }
1115 : :
1116 : :
1117 : : ////////////////////////////////////////////////////////////////////////
1118 : :
1119 : :
1120 : : // RAII style sqlite prepared-statement holder that matches { } block lifetime
1121 : :
1122 : : struct sqlite_ps
1123 : : {
1124 : : private:
1125 : : sqlite3* db;
1126 : : const string nickname;
1127 : : const string sql;
1128 : : sqlite3_stmt *pp;
1129 : : // for step_timeout()/callback
1130 : : struct timespec ts_start;
1131 : : double ts_timeout;
1132 : :
1133 : : sqlite_ps(const sqlite_ps&); // make uncopyable
1134 : : sqlite_ps& operator=(const sqlite_ps &); // make unassignable
1135 : :
1136 : : public:
1137 [ + - - - ]: 9136 : sqlite_ps (sqlite3* d, const string& n, const string& s): db(d), nickname(n), sql(s) {
1138 : : // tmp_ms_metric tick("sqlite3","prep",nickname);
1139 [ + + ]: 9138 : if (verbose > 4)
1140 [ + - + - : 174 : obatched(clog) << nickname << " prep " << sql << endl;
+ - + - +
- - - ]
1141 [ + - ]: 9138 : int rc = sqlite3_prepare_v2 (db, sql.c_str(), -1 /* to \0 */, & this->pp, NULL);
1142 [ - + ]: 9138 : if (rc != SQLITE_OK)
1143 [ # # # # ]: 0 : throw sqlite_exception(rc, "prepare " + sql);
1144 : 18276 : this->reset_timeout(0.0);
1145 : 0 : }
1146 : :
1147 : 190242 : sqlite_ps& reset()
1148 : : {
1149 [ + - + - : 380470 : tmp_ms_metric tick("sqlite3","reset",nickname);
- + - - ]
1150 [ + - ]: 190228 : sqlite3_reset(this->pp);
1151 : 190244 : return *this;
1152 : 190239 : }
1153 : :
1154 : 218083 : sqlite_ps& bind(int parameter, const string& str)
1155 : : {
1156 [ + + ]: 218083 : if (verbose > 4)
1157 [ + - + - : 196 : obatched(clog) << nickname << " bind " << parameter << "=" << str << endl;
+ - + - +
- + - ]
1158 : 218083 : int rc = sqlite3_bind_text (this->pp, parameter, str.c_str(), -1, SQLITE_TRANSIENT);
1159 [ - + ]: 218082 : if (rc != SQLITE_OK)
1160 [ # # # # ]: 0 : throw sqlite_exception(rc, "sqlite3 bind");
1161 : 218082 : return *this;
1162 : : }
1163 : :
1164 : 58496 : sqlite_ps& bind(int parameter, int64_t value)
1165 : : {
1166 [ + + ]: 58496 : if (verbose > 4)
1167 [ + - + - : 64 : obatched(clog) << nickname << " bind " << parameter << "=" << value << endl;
+ - + - +
- + - ]
1168 : 58496 : int rc = sqlite3_bind_int64 (this->pp, parameter, value);
1169 [ - + ]: 58500 : if (rc != SQLITE_OK)
1170 [ # # # # ]: 0 : throw sqlite_exception(rc, "sqlite3 bind");
1171 : 58500 : return *this;
1172 : : }
1173 : :
1174 : : sqlite_ps& bind(int parameter)
1175 : : {
1176 : : if (verbose > 4)
1177 : : obatched(clog) << nickname << " bind " << parameter << "=" << "NULL" << endl;
1178 : : int rc = sqlite3_bind_null (this->pp, parameter);
1179 : : if (rc != SQLITE_OK)
1180 : : throw sqlite_exception(rc, "sqlite3 bind");
1181 : : return *this;
1182 : : }
1183 : :
1184 : :
1185 : 117210 : void step_ok_done() {
1186 [ + - + - : 234429 : tmp_ms_metric tick("sqlite3","step_done",nickname);
- + - - ]
1187 [ + - ]: 117219 : int rc = sqlite3_step (this->pp);
1188 [ + + ]: 117236 : if (verbose > 4)
1189 [ + - + - : 128 : obatched(clog) << nickname << " step-ok-done(" << sqlite3_errstr(rc) << ") " << sql << endl;
+ - + - +
- + - + -
+ - ]
1190 [ + + - + ]: 117236 : if (rc != SQLITE_OK && rc != SQLITE_DONE && rc != SQLITE_ROW)
1191 [ # # # # ]: 0 : throw sqlite_exception(rc, "sqlite3 step");
1192 [ + - ]: 117236 : (void) sqlite3_reset (this->pp);
1193 : 117236 : }
1194 : :
1195 : :
1196 : 38826 : int step() {
1197 [ + - + - : 77653 : tmp_ms_metric tick("sqlite3","step",nickname);
- + - - ]
1198 [ + - ]: 38827 : int rc = sqlite3_step (this->pp);
1199 [ + + ]: 38828 : if (verbose > 4)
1200 [ + - + - : 62 : obatched(clog) << nickname << " step(" << sqlite3_errstr(rc) << ") " << sql << endl;
+ - + - +
- + - + -
+ - ]
1201 : 38828 : return rc;
1202 : 38828 : }
1203 : :
1204 : :
1205 : 9164 : void reset_timeout(double s) // set starting point for maximum elapsed time in step_timeouts()
1206 : : {
1207 : 9138 : clock_gettime (CLOCK_MONOTONIC, &this->ts_start);
1208 : 9138 : this->ts_timeout = s;
1209 : : }
1210 : :
1211 : :
1212 : 0 : static int sqlite3_progress_handler_cb (void *param)
1213 : : {
1214 : 0 : sqlite_ps *pp = (sqlite_ps*) param;
1215 : 0 : struct timespec ts_end;
1216 : 0 : clock_gettime (CLOCK_MONOTONIC, &ts_end);
1217 : 0 : double deltas = (ts_end.tv_sec - pp->ts_start.tv_sec) + (ts_end.tv_nsec - pp->ts_start.tv_nsec)/1.e9;
1218 [ # # # # ]: 0 : return (interrupted || (deltas > pp->ts_timeout)); // non-zero => interrupt sqlite operation in progress
1219 : : }
1220 : :
1221 : :
1222 : 42 : int step_timeout() {
1223 : : // Do the same thing as step(), except wrapping it into a timeout
1224 : : // relative to the last reset_timeout() invocation.
1225 : : //
1226 : : // Do this by attaching a progress_handler to the database
1227 : : // connection, for the duration of this operation. It should be a
1228 : : // private connection to the calling thread, so other operations
1229 : : // cannot begin concurrently.
1230 : :
1231 : 42 : sqlite3_progress_handler(this->db, 10000 /* bytecode insns */,
1232 : : & sqlite3_progress_handler_cb, (void*) this);
1233 : 42 : int rc = this->step();
1234 : 42 : sqlite3_progress_handler(this->db, 0, 0, 0); // disable
1235 : 42 : struct timespec ts_end;
1236 : 42 : clock_gettime (CLOCK_MONOTONIC, &ts_end);
1237 : 42 : double deltas = (ts_end.tv_sec - this->ts_start.tv_sec) + (ts_end.tv_nsec - this->ts_start.tv_nsec)/1.e9;
1238 [ - + ]: 42 : if (verbose > 3)
1239 [ # # # # : 0 : obatched(clog) << this->nickname << " progress-delta-final " << deltas << endl;
# # # # ]
1240 : 42 : return rc;
1241 : : }
1242 : :
1243 : :
1244 [ + + + + ]: 18115 : ~sqlite_ps () { sqlite3_finalize (this->pp); }
1245 [ + - + - : 6788 : operator sqlite3_stmt* () { return this->pp; }
+ - + - +
- + - + -
+ - + - +
- ]
1246 : : };
1247 : :
1248 : :
1249 : : ////////////////////////////////////////////////////////////////////////
1250 : :
1251 : :
1252 : : struct sqlite_checkpoint_pb: public periodic_barrier
1253 : : {
1254 : : // NB: don't use sqlite_ps since it can throw exceptions during ctor etc.
1255 : 72 : sqlite_checkpoint_pb(unsigned t, unsigned p):
1256 : 144 : periodic_barrier(t, p) { }
1257 : :
1258 : 36 : void periodic_barrier_work() noexcept
1259 : : {
1260 : 36 : (void) sqlite3_exec (db, "pragma wal_checkpoint(truncate);", NULL, NULL, NULL);
1261 : 36 : }
1262 : : };
1263 : :
1264 : : static periodic_barrier* scan_barrier = 0; // initialized in main()
1265 : :
1266 : :
1267 : : ////////////////////////////////////////////////////////////////////////
1268 : :
1269 : : // RAII style templated autocloser
1270 : :
1271 : : template <class Payload, class Ignore>
1272 : : struct defer_dtor
1273 : : {
1274 : : public:
1275 : : typedef Ignore (*dtor_fn) (Payload);
1276 : :
1277 : : private:
1278 : : Payload p;
1279 : : dtor_fn fn;
1280 : :
1281 : : public:
1282 : 5074 : defer_dtor(Payload _p, dtor_fn _fn): p(_p), fn(_fn) {}
1283 : 748 : ~defer_dtor() { (void) (*fn)(p); }
1284 : :
1285 : : private:
1286 : : defer_dtor(const defer_dtor<Payload,Ignore>&); // make uncopyable
1287 : : defer_dtor& operator=(const defer_dtor<Payload,Ignore> &); // make unassignable
1288 : : };
1289 : :
1290 : :
1291 : :
1292 : : ////////////////////////////////////////////////////////////////////////
1293 : :
1294 : :
1295 : : static string
1296 : 6694 : header_censor(const string& str)
1297 : : {
1298 : 6694 : string y;
1299 [ + + ]: 82839 : for (auto&& x : str)
1300 : : {
1301 [ + + ]: 76145 : if (isalnum(x) || x == '/' || x == '.' || x == ',' || x == '_' || x == ':')
1302 [ + - ]: 152284 : y += x;
1303 : : }
1304 : 6694 : return y;
1305 : 0 : }
1306 : :
1307 : :
1308 : : static string
1309 : 3347 : conninfo (struct MHD_Connection * conn)
1310 : : {
1311 : 3347 : char hostname[256]; // RFC1035
1312 : 3347 : char servname[256];
1313 : 3347 : int sts = -1;
1314 : :
1315 [ - + ]: 3347 : if (conn == 0)
1316 : 0 : return "internal";
1317 : :
1318 : : /* Look up client address data. */
1319 : 3347 : const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn,
1320 : : MHD_CONNECTION_INFO_CLIENT_ADDRESS);
1321 [ + - ]: 3347 : struct sockaddr *so = u ? u->client_addr : 0;
1322 : :
1323 [ + - - + ]: 3347 : if (so && so->sa_family == AF_INET) {
1324 : 0 : sts = getnameinfo (so, sizeof (struct sockaddr_in),
1325 : : hostname, sizeof (hostname),
1326 : : servname, sizeof (servname),
1327 : : NI_NUMERICHOST | NI_NUMERICSERV);
1328 [ + - ]: 3347 : } else if (so && so->sa_family == AF_INET6) {
1329 : 3347 : struct sockaddr_in6* addr6 = (struct sockaddr_in6*) so;
1330 [ + - + - : 3347 : if (IN6_IS_ADDR_V4MAPPED(&addr6->sin6_addr)) {
- + ]
1331 : 3347 : struct sockaddr_in addr4;
1332 : 3347 : memset (&addr4, 0, sizeof(addr4));
1333 : 3347 : addr4.sin_family = AF_INET;
1334 : 3347 : addr4.sin_port = addr6->sin6_port;
1335 : 3347 : memcpy (&addr4.sin_addr.s_addr, addr6->sin6_addr.s6_addr+12, sizeof(addr4.sin_addr.s_addr));
1336 : 3347 : sts = getnameinfo ((struct sockaddr*) &addr4, sizeof (addr4),
1337 : : hostname, sizeof (hostname),
1338 : : servname, sizeof (servname),
1339 : : NI_NUMERICHOST | NI_NUMERICSERV);
1340 : : } else {
1341 : 0 : sts = getnameinfo (so, sizeof (struct sockaddr_in6),
1342 : : hostname, sizeof (hostname),
1343 : : servname, sizeof (servname),
1344 : : NI_NUMERICHOST | NI_NUMERICSERV);
1345 : : }
1346 : : }
1347 : :
1348 [ - + ]: 3347 : if (sts != 0) {
1349 : 0 : hostname[0] = servname[0] = '\0';
1350 : : }
1351 : :
1352 : : // extract headers relevant to administration
1353 [ - + ]: 3347 : const char* user_agent = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: "";
1354 [ + + ]: 3347 : const char* x_forwarded_for = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: "";
1355 : : // NB: these are untrustworthy, beware if machine-processing log files
1356 : :
1357 [ + - + - : 10041 : return string(hostname) + string(":") + string(servname) +
+ - + - +
- + - - +
- + - + -
+ - + - +
- - - - -
- ]
1358 [ + - + - : 14890 : string(" UA:") + header_censor(string(user_agent)) +
+ - + - +
- - + + +
- + + + -
+ - - -
- ]
1359 [ + - + - : 10053 : string(" XFF:") + header_censor(string(x_forwarded_for));
+ - + + +
+ - - ]
1360 : : }
1361 : :
1362 : :
1363 : :
1364 : : ////////////////////////////////////////////////////////////////////////
1365 : :
1366 : : /* Wrapper for MHD_add_response_header that logs an error if we
1367 : : couldn't add the specified header. */
1368 : : static void
1369 : 13031 : add_mhd_response_header (struct MHD_Response *r,
1370 : : const char *h, const char *v)
1371 : : {
1372 [ - + ]: 13031 : if (MHD_add_response_header (r, h, v) == MHD_NO)
1373 [ # # # # : 0 : obatched(clog) << "Error: couldn't add '" << h << "' header" << endl;
# # ]
1374 : 13031 : }
1375 : :
1376 : : static void
1377 : 2126 : add_mhd_last_modified (struct MHD_Response *resp, time_t mtime)
1378 : : {
1379 : 2126 : struct tm now;
1380 : 2126 : struct tm *nowp = gmtime_r (&mtime, &now);
1381 [ + - ]: 2126 : if (nowp != NULL)
1382 : : {
1383 : 2126 : char datebuf[80];
1384 : 2126 : size_t rc = strftime (datebuf, sizeof (datebuf), "%a, %d %b %Y %T GMT",
1385 : : nowp);
1386 [ + - ]: 2126 : if (rc > 0 && rc < sizeof (datebuf))
1387 : 2126 : add_mhd_response_header (resp, "Last-Modified", datebuf);
1388 : : }
1389 : :
1390 : 2126 : add_mhd_response_header (resp, "Cache-Control", "public");
1391 : 2126 : }
1392 : :
1393 : : // quote all questionable characters of str for safe passage through a sh -c expansion.
1394 : : static string
1395 : 64 : shell_escape(const string& str)
1396 : : {
1397 : 64 : string y;
1398 [ + + ]: 4478 : for (auto&& x : str)
1399 : : {
1400 [ + + + + ]: 4414 : if (! isalnum(x) && x != '/')
1401 [ + - ]: 586 : y += "\\";
1402 [ + - ]: 8828 : y += x;
1403 : : }
1404 : 64 : return y;
1405 : 0 : }
1406 : :
1407 : :
1408 : : // PR25548: Perform POSIX / RFC3986 style path canonicalization on the input string.
1409 : : //
1410 : : // Namely:
1411 : : // // -> /
1412 : : // /foo/../ -> /
1413 : : // /./ -> /
1414 : : //
1415 : : // This mapping is done on dwarf-side source path names, which may
1416 : : // include these constructs, so we can deal with debuginfod clients
1417 : : // that accidentally canonicalize the paths.
1418 : : //
1419 : : // realpath(3) is close but not quite right, because it also resolves
1420 : : // symbolic links. Symlinks at the debuginfod server have nothing to
1421 : : // do with the build-time symlinks, thus they must not be considered.
1422 : : //
1423 : : // see also curl Curl_dedotdotify() aka RFC3986, which we mostly follow here
1424 : : // see also libc __realpath()
1425 : : // see also llvm llvm::sys::path::remove_dots()
1426 : : static string
1427 : 16254 : canon_pathname (const string& input)
1428 : : {
1429 : 16254 : string i = input; // 5.2.4 (1)
1430 : 16254 : string o;
1431 : :
1432 : 155004 : while (i.size() != 0)
1433 : : {
1434 : : // 5.2.4 (2) A
1435 [ + - - + : 277500 : if (i.substr(0,3) == "../")
- + ]
1436 [ # # # # ]: 0 : i = i.substr(3);
1437 [ + - - + : 277500 : else if(i.substr(0,2) == "./")
- + ]
1438 [ # # # # ]: 0 : i = i.substr(2);
1439 : :
1440 : : // 5.2.4 (2) B
1441 [ + - - + : 277500 : else if (i.substr(0,3) == "/./")
+ + ]
1442 [ + - + + ]: 2822 : i = i.substr(2);
1443 [ - + ]: 137106 : else if (i == "/.")
1444 [ # # ]: 0 : i = ""; // no need to handle "/." complete-path-segment case; we're dealing with file names
1445 : :
1446 : : // 5.2.4 (2) C
1447 [ + - - + : 274212 : else if (i.substr(0,4) == "/../") {
+ + ]
1448 [ + - + + ]: 2266 : i = i.substr(3);
1449 : 2266 : string::size_type sl = o.rfind("/");
1450 [ + - ]: 2266 : if (sl != string::npos)
1451 [ + - + - ]: 4532 : o = o.substr(0, sl);
1452 : : else
1453 [ # # ]: 0 : o = "";
1454 [ - + ]: 134840 : } else if (i == "/..")
1455 [ # # ]: 0 : i = ""; // no need to handle "/.." complete-path-segment case; we're dealing with file names
1456 : :
1457 : : // 5.2.4 (2) D
1458 : : // no need to handle these cases; we're dealing with file names
1459 [ - + ]: 134840 : else if (i == ".")
1460 [ # # ]: 0 : i = "";
1461 [ - + ]: 134840 : else if (i == "..")
1462 [ # # ]: 0 : i = "";
1463 : :
1464 : : // POSIX special: map // to /
1465 [ + - - + : 269680 : else if (i.substr(0,2) == "//")
+ + ]
1466 [ + - + + ]: 144 : i = i.substr(1);
1467 : :
1468 : : // 5.2.4 (2) E
1469 : : else {
1470 [ - + ]: 134712 : string::size_type next_slash = i.find("/", (i[0]=='/' ? 1 : 0)); // skip first slash
1471 [ + - + + ]: 269424 : o += i.substr(0, next_slash);
1472 [ + + ]: 134712 : if (next_slash == string::npos)
1473 [ + - + + ]: 171258 : i = "";
1474 : : else
1475 [ + - + + : 223788 : i = i.substr(next_slash);
- - ]
1476 : : }
1477 : : }
1478 : :
1479 [ + - ]: 16254 : return o;
1480 : 16254 : }
1481 : :
1482 : :
1483 : : // Estimate available free space for a given filesystem via statfs(2).
1484 : : // Return true if the free fraction is known to be smaller than the
1485 : : // given minimum percentage. Also update a related metric.
1486 : 2972 : bool statfs_free_enough_p(const string& path, const string& label, long minfree = 0)
1487 : : {
1488 : 2972 : struct statfs sfs;
1489 : 2972 : int rc = statfs(path.c_str(), &sfs);
1490 [ + + ]: 2972 : if (rc == 0)
1491 : : {
1492 : 2898 : double s = (double) sfs.f_bavail / (double) sfs.f_blocks;
1493 [ + - + - : 5796 : set_metric("filesys_free_ratio","purpose",label, s);
- + - - ]
1494 : 2898 : return ((s * 100.0) < minfree);
1495 : : }
1496 : : return false;
1497 : : }
1498 : :
1499 : :
1500 : :
1501 : : // A map-like class that owns a cache of file descriptors (indexed by
1502 : : // file / content names).
1503 : : //
1504 : : // If only it could use fd's instead of file names ... but we can't
1505 : : // dup(2) to create independent descriptors for the same unlinked
1506 : : // files, so would have to use some goofy linux /proc/self/fd/%d
1507 : : // hack such as the following
1508 : :
1509 : : #if 0
1510 : : int superdup(int fd)
1511 : : {
1512 : : #ifdef __linux__
1513 : : char *fdpath = NULL;
1514 : : int rc = asprintf(& fdpath, "/proc/self/fd/%d", fd);
1515 : : int newfd;
1516 : : if (rc >= 0)
1517 : : newfd = open(fdpath, O_RDONLY);
1518 : : else
1519 : : newfd = -1;
1520 : : free (fdpath);
1521 : : return newfd;
1522 : : #else
1523 : : return -1;
1524 : : #endif
1525 : : }
1526 : : #endif
1527 : :
1528 : : class libarchive_fdcache
1529 : : {
1530 : : private:
1531 : : mutex fdcache_lock;
1532 : :
1533 : : typedef pair<string,string> key; // archive, entry
1534 [ + - ]: 282 : struct fdcache_entry
1535 : : {
1536 : : string fd; // file name (probably in $TMPDIR), not an actual open fd (EMFILE)
1537 : : double fd_size_mb; // slightly rounded up megabytes
1538 : : time_t freshness; // when was this entry created or requested last
1539 : : unsigned request_count; // how many requests were made; or 0=prefetch only
1540 : : double latency; // how many seconds it took to extract the file
1541 : : };
1542 : :
1543 : : map<key,fdcache_entry> entries; // optimized for lookup
1544 : : time_t last_cleaning;
1545 : : long max_mbs;
1546 : :
1547 : : public:
1548 : 362 : void set_metrics()
1549 : : {
1550 : 362 : double fdcache_mb = 0.0;
1551 : 362 : double prefetch_mb = 0.0;
1552 : 362 : unsigned fdcache_count = 0;
1553 : 362 : unsigned prefetch_count = 0;
1554 [ + + ]: 3474 : for (auto &i : entries) {
1555 [ + + ]: 3112 : if (i.second.request_count) {
1556 : 3018 : fdcache_mb += i.second.fd_size_mb;
1557 : 3018 : fdcache_count ++;
1558 : : } else {
1559 : 94 : prefetch_mb += i.second.fd_size_mb;
1560 : 94 : prefetch_count ++;
1561 : : }
1562 : : }
1563 [ + - ]: 362 : set_metric("fdcache_bytes", fdcache_mb*1024.0*1024.0);
1564 [ + - ]: 362 : set_metric("fdcache_count", fdcache_count);
1565 [ + - ]: 362 : set_metric("fdcache_prefetch_bytes", prefetch_mb*1024.0*1024.0);
1566 [ + - ]: 362 : set_metric("fdcache_prefetch_count", prefetch_count);
1567 : 362 : }
1568 : :
1569 : 284 : void intern(const string& a, const string& b, string fd, off_t sz,
1570 : : bool requested_p, double lat)
1571 : : {
1572 : 284 : {
1573 : 284 : unique_lock<mutex> lock(fdcache_lock);
1574 : 284 : time_t now = time(NULL);
1575 : : // there is a chance it's already in here, just wasn't found last time
1576 : : // if so, there's nothing to do but count our luck
1577 [ + - ]: 284 : auto i = entries.find(make_pair(a,b));
1578 [ + + ]: 284 : if (i != entries.end())
1579 : : {
1580 [ + - + - : 4 : inc_metric("fdcache_op_count","op","redundant_intern");
+ - + - -
+ + - - -
- - ]
1581 [ + - ]: 2 : if (requested_p) i->second.request_count ++; // repeat prefetch doesn't count
1582 : 2 : i->second.freshness = now;
1583 : : // We need to nuke the temp file, since interning passes
1584 : : // responsibility over the path to this structure. It is
1585 : : // possible that the caller still has an fd open, but that's
1586 : : // OK.
1587 : 2 : unlink (fd.c_str());
1588 : 2 : return;
1589 : : }
1590 : 282 : double mb = (sz+65535)/1048576.0; // round up to 64K block
1591 : 282 : fdcache_entry n = { .fd=fd, .fd_size_mb=mb,
1592 : 282 : .freshness=now, .request_count = requested_p?1U:0U,
1593 [ + - + + ]: 282 : .latency=lat};
1594 [ + - + - : 282 : entries.insert(make_pair(make_pair(a,b),n));
+ - ]
1595 : :
1596 [ + + ]: 282 : if (requested_p)
1597 [ + - + - : 472 : inc_metric("fdcache_op_count","op","enqueue");
+ - + - -
+ - + - -
- - ]
1598 : : else
1599 [ + - + - : 138 : inc_metric("fdcache_op_count","op","prefetch_enqueue");
+ - + - -
+ + - - -
- - ]
1600 : :
1601 [ + + ]: 282 : if (verbose > 3)
1602 [ + - + - : 504 : obatched(clog) << "fdcache interned a=" << a << " b=" << b
- - ]
1603 [ + - + - : 168 : << " fd=" << fd << " mb=" << mb << " front=" << requested_p
+ - + - +
- + - + -
+ - ]
1604 [ + - + - : 168 : << " latency=" << lat << endl;
+ - ]
1605 : :
1606 [ + - ]: 282 : set_metrics();
1607 : 284 : }
1608 : :
1609 : : // NB: we age the cache at lookup time too
1610 [ + - - + : 282 : if (statfs_free_enough_p(tmpdir, "tmpdir", fdcache_mintmp))
- + ]
1611 : : {
1612 [ # # # # : 0 : inc_metric("fdcache_op_count","op","emerg-flush");
# # # # #
# # # #
# ]
1613 [ # # ]: 0 : obatched(clog) << "fdcache emergency flush for filling tmpdir" << endl;
1614 : 0 : this->limit(0); // emergency flush
1615 : : }
1616 : : else // age cache normally
1617 : 282 : this->limit(max_mbs);
1618 : : }
1619 : :
1620 : 1014 : int lookup(const string& a, const string& b)
1621 : : {
1622 : 1014 : int fd = -1;
1623 : 1014 : {
1624 : 1014 : unique_lock<mutex> lock(fdcache_lock);
1625 [ + - ]: 1014 : auto i = entries.find(make_pair(a,b));
1626 [ + + ]: 1014 : if (i != entries.end())
1627 : : {
1628 [ + + ]: 772 : if (i->second.request_count == 0) // was a prefetch!
1629 : : {
1630 [ + - + - ]: 12 : inc_metric("fdcache_prefetch_saved_milliseconds_count");
1631 [ + - + - ]: 24 : add_metric("fdcache_prefetch_saved_milliseconds_sum", i->second.latency*1000.);
1632 : : }
1633 : 772 : i->second.request_count ++;
1634 : 772 : i->second.freshness = time(NULL);
1635 : : // brag about our success
1636 [ + - + - : 1544 : inc_metric("fdcache_op_count","op","prefetch_access"); // backward compat
+ - + - -
+ - + - -
- - ]
1637 [ + - + - ]: 772 : inc_metric("fdcache_saved_milliseconds_count");
1638 [ + - + - ]: 772 : add_metric("fdcache_saved_milliseconds_sum", i->second.latency*1000.);
1639 [ + - ]: 1014 : fd = open(i->second.fd.c_str(), O_RDONLY);
1640 : : }
1641 : 0 : }
1642 : :
1643 [ + + ]: 1014 : if (fd >= 0)
1644 [ + - + - : 1544 : inc_metric("fdcache_op_count","op","lookup_hit");
+ - - + -
+ - - -
- ]
1645 : : else
1646 [ + - + - : 484 : inc_metric("fdcache_op_count","op","lookup_miss");
+ - - + -
+ - - -
- ]
1647 : :
1648 : : // NB: no need to age the cache after just a lookup
1649 : :
1650 : 1014 : return fd;
1651 : : }
1652 : :
1653 : 208 : int probe(const string& a, const string& b) // just a cache residency check - don't modify state, don't open
1654 : : {
1655 : 208 : unique_lock<mutex> lock(fdcache_lock);
1656 [ + - ]: 208 : auto i = entries.find(make_pair(a,b));
1657 [ + + ]: 208 : if (i != entries.end()) {
1658 [ + - + - : 56 : inc_metric("fdcache_op_count","op","probe_hit");
+ - + - -
+ - + - -
- - ]
1659 : 28 : return true;
1660 : : } else {
1661 [ + - + - : 360 : inc_metric("fdcache_op_count","op","probe_miss");
+ - + - -
+ - + - -
- - ]
1662 : 180 : return false;
1663 : : }
1664 : 208 : }
1665 : :
1666 : 0 : void clear(const string& a, const string& b)
1667 : : {
1668 : 0 : unique_lock<mutex> lock(fdcache_lock);
1669 [ # # ]: 0 : auto i = entries.find(make_pair(a,b));
1670 [ # # ]: 0 : if (i != entries.end()) {
1671 [ # # # # : 0 : inc_metric("fdcache_op_count","op",
# # # # #
# # # # #
# # # # ]
1672 [ # # ]: 0 : i->second.request_count > 0 ? "clear" : "prefetch_clear");
1673 : 0 : unlink (i->second.fd.c_str());
1674 : 0 : entries.erase(i);
1675 [ # # ]: 0 : set_metrics();
1676 : 0 : return;
1677 : : }
1678 : 0 : }
1679 : :
1680 : 442 : void limit(long maxmbs, bool metrics_p = true)
1681 : : {
1682 : 442 : time_t now = time(NULL);
1683 : :
1684 : : // avoid overly frequent limit operations
1685 [ + + + + ]: 442 : if (maxmbs > 0 && (now - this->last_cleaning) < 10) // probably not worth parametrizing
1686 : 282 : return;
1687 : 160 : this->last_cleaning = now;
1688 : :
1689 [ + + + - ]: 160 : if (verbose > 3 && (this->max_mbs != maxmbs))
1690 [ + - + - ]: 192 : obatched(clog) << "fdcache limited to maxmbs=" << maxmbs << endl;
1691 : :
1692 : 160 : unique_lock<mutex> lock(fdcache_lock);
1693 : :
1694 : 160 : this->max_mbs = maxmbs;
1695 : 160 : double total_mb = 0.0;
1696 : :
1697 : 160 : map<double, pair<string,string>> sorted_entries;
1698 [ + + ]: 442 : for (auto &i: entries)
1699 : : {
1700 : 282 : total_mb += i.second.fd_size_mb;
1701 : :
1702 : : // need a scalar quantity that combines these inputs in a sensible way:
1703 : : //
1704 : : // 1) freshness of this entry (last time it was accessed)
1705 : : // 2) size of this entry
1706 : : // 3) number of times it has been accessed (or if just prefetched with 0 accesses)
1707 : : // 4) latency it required to extract
1708 : : //
1709 : : // The lower the "score", the earlier garbage collection will
1710 : : // nuke it, so to prioritize entries for preservation, the
1711 : : // score should be higher, and vice versa.
1712 : 282 : time_t factor_1_freshness = (now - i.second.freshness); // seconds
1713 : 282 : double factor_2_size = i.second.fd_size_mb; // megabytes
1714 : 282 : unsigned factor_3_accesscount = i.second.request_count; // units
1715 : 282 : double factor_4_latency = i.second.latency; // seconds
1716 : :
1717 : : #if 0
1718 : : double score = - factor_1_freshness; // simple LRU
1719 : : #endif
1720 : :
1721 [ + + ]: 282 : double score = 0.
1722 : 282 : - log1p(factor_1_freshness) // penalize old file
1723 : 282 : - log1p(factor_2_size) // penalize large file
1724 : 282 : + factor_4_latency * factor_3_accesscount; // reward slow + repeatedly read files
1725 : :
1726 [ + + ]: 282 : if (verbose > 4)
1727 [ + - ]: 8 : obatched(clog) << "fdcache scored score=" << score
1728 [ + - + - ]: 8 : << " a=" << i.first.first << " b=" << i.first.second
1729 [ + - + - : 12 : << " f1=" << factor_1_freshness << " f2=" << factor_2_size
+ - + - +
- + - +
- ]
1730 [ + - + - : 4 : << " f3=" << factor_3_accesscount << " f4=" << factor_4_latency
+ - + - +
- ]
1731 : 4 : << endl;
1732 : :
1733 [ + - + - ]: 564 : sorted_entries.insert(make_pair(score, i.first));
1734 : : }
1735 : :
1736 : 160 : unsigned cleaned = 0;
1737 : 160 : unsigned entries_original = entries.size();
1738 : 160 : double cleaned_score_min = DBL_MAX;
1739 : 160 : double cleaned_score_max = DBL_MIN;
1740 : :
1741 : : // drop as many entries[] as needed to bring total mb down to the threshold
1742 [ + + ]: 442 : for (auto &i: sorted_entries) // in increasing score order!
1743 : : {
1744 [ - + ]: 282 : if (this->max_mbs > 0 // if this is not a "clear entire table"
1745 [ # # ]: 0 : && total_mb < this->max_mbs) // we've cleared enough to meet threshold
1746 : : break; // stop clearing
1747 : :
1748 [ - + ]: 282 : auto j = entries.find(i.second);
1749 [ - + ]: 282 : if (j == entries.end())
1750 : 0 : continue; // should not happen
1751 : :
1752 [ + + ]: 282 : if (cleaned == 0)
1753 : 36 : cleaned_score_min = i.first;
1754 : 282 : cleaned++;
1755 : 282 : cleaned_score_max = i.first;
1756 : :
1757 [ + + ]: 282 : if (verbose > 3)
1758 [ + - + - ]: 504 : obatched(clog) << "fdcache evicted score=" << i.first
1759 [ + - + - ]: 336 : << " a=" << i.second.first << " b=" << i.second.second
1760 [ + - + - : 504 : << " fd=" << j->second.fd << " mb=" << j->second.fd_size_mb
+ - + - +
- + - ]
1761 [ + - + - : 168 : << " rq=" << j->second.request_count << " lat=" << j->second.latency
+ - + - ]
1762 [ + - + - : 168 : << " fr=" << (now - j->second.freshness)
+ - ]
1763 : 168 : << endl;
1764 [ - + ]: 282 : if (metrics_p)
1765 [ # # # # : 0 : inc_metric("fdcache_op_count","op","evict");
# # # # #
# # # # #
# # ]
1766 : :
1767 : 282 : total_mb -= j->second.fd_size_mb;
1768 : 282 : unlink (j->second.fd.c_str());
1769 : 282 : entries.erase(j);
1770 : : }
1771 : :
1772 [ + + ]: 160 : if (metrics_p)
1773 [ + - + - : 160 : inc_metric("fdcache_op_count","op","evict_cycle");
+ - + - -
+ - + - -
- - ]
1774 : :
1775 [ + - + + ]: 160 : if (verbose > 1 && cleaned > 0)
1776 : : {
1777 [ + - + - : 108 : obatched(clog) << "fdcache evicted num=" << cleaned << " of=" << entries_original
+ - + - ]
1778 [ + - + - : 36 : << " min=" << cleaned_score_min << " max=" << cleaned_score_max
+ - + - +
- ]
1779 : 36 : << endl;
1780 : : }
1781 : :
1782 [ + + + - ]: 160 : if (metrics_p) set_metrics();
1783 : 160 : }
1784 : :
1785 : :
1786 : 80 : ~libarchive_fdcache()
1787 : : {
1788 : : // unlink any fdcache entries in $TMPDIR
1789 : : // don't update metrics; those globals may be already destroyed
1790 : 80 : limit(0, false);
1791 : 80 : }
1792 : : };
1793 : : static libarchive_fdcache fdcache;
1794 : :
1795 : : /* Search ELF_FD for an ELF/DWARF section with name SECTION.
1796 : : If found copy the section to a temporary file and return
1797 : : its file descriptor, otherwise return -1.
1798 : :
1799 : : The temporary file's mtime will be set to PARENT_MTIME.
1800 : : B_SOURCE should be a description of the parent file suitable
1801 : : for printing to the log. */
1802 : :
1803 : : static int
1804 : 14 : extract_section (int elf_fd, int64_t parent_mtime,
1805 : : const string& b_source, const string& section,
1806 : : const timespec& extract_begin)
1807 : : {
1808 : : /* Search the fdcache. */
1809 : 14 : struct stat fs;
1810 : 14 : int fd = fdcache.lookup (b_source, section);
1811 [ - + ]: 14 : if (fd >= 0)
1812 : : {
1813 [ # # ]: 0 : if (fstat (fd, &fs) != 0)
1814 : : {
1815 [ # # ]: 0 : if (verbose)
1816 [ # # ]: 0 : obatched (clog) << "cannot fstate fdcache "
1817 [ # # # # : 0 : << b_source << " " << section << endl;
# # ]
1818 : 0 : close (fd);
1819 : 0 : return -1;
1820 : : }
1821 [ # # ]: 0 : if ((int64_t) fs.st_mtime != parent_mtime)
1822 : : {
1823 [ # # ]: 0 : if (verbose)
1824 [ # # ]: 0 : obatched(clog) << "mtime mismatch for "
1825 [ # # # # : 0 : << b_source << " " << section << endl;
# # ]
1826 : 0 : close (fd);
1827 : 0 : return -1;
1828 : : }
1829 : : /* Success. */
1830 : : return fd;
1831 : : }
1832 : :
1833 : 14 : Elf *elf = elf_begin (elf_fd, ELF_C_READ_MMAP_PRIVATE, NULL);
1834 [ - + ]: 14 : if (elf == NULL)
1835 : : return -1;
1836 : :
1837 : : /* Try to find the section and copy the contents into a separate file. */
1838 : 14 : try
1839 : : {
1840 : 14 : size_t shstrndx;
1841 [ + - ]: 14 : int rc = elf_getshdrstrndx (elf, &shstrndx);
1842 [ - + ]: 14 : if (rc < 0)
1843 [ # # # # ]: 0 : throw elfutils_exception (rc, "getshdrstrndx");
1844 : :
1845 : : Elf_Scn *scn = NULL;
1846 : 482 : while (true)
1847 : : {
1848 [ + - ]: 248 : scn = elf_nextscn (elf, scn);
1849 [ + - ]: 248 : if (scn == NULL)
1850 : : break;
1851 : 248 : GElf_Shdr shdr_storage;
1852 [ + - ]: 248 : GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_storage);
1853 [ + - ]: 248 : if (shdr == NULL)
1854 : : break;
1855 : :
1856 [ + - ]: 248 : const char *scn_name = elf_strptr (elf, shstrndx, shdr->sh_name);
1857 [ + - ]: 248 : if (scn_name == NULL)
1858 : : break;
1859 [ + + ]: 248 : if (scn_name == section)
1860 : : {
1861 : 14 : Elf_Data *data = NULL;
1862 : :
1863 : : /* We found the desired section. */
1864 [ + - ]: 14 : data = elf_rawdata (scn, NULL);
1865 [ - + ]: 14 : if (data == NULL)
1866 [ # # # # : 0 : throw elfutils_exception (elf_errno (), "elfraw_data");
# # ]
1867 [ + + ]: 14 : if (data->d_buf == NULL)
1868 : : {
1869 [ + - + - ]: 12 : obatched(clog) << "section " << section
1870 [ + - + - ]: 6 : << " is empty" << endl;
1871 : 6 : break;
1872 : : }
1873 : :
1874 : : /* Create temporary file containing the section. */
1875 : 8 : char *tmppath = NULL;
1876 : 8 : rc = asprintf (&tmppath, "%s/debuginfod-section.XXXXXX", tmpdir.c_str());
1877 [ - + ]: 8 : if (rc < 0)
1878 [ # # # # ]: 0 : throw libc_exception (ENOMEM, "cannot allocate tmppath");
1879 : 8 : defer_dtor<void*,void> tmmpath_freer (tmppath, free);
1880 [ + - ]: 8 : fd = mkstemp (tmppath);
1881 [ - + ]: 8 : if (fd < 0)
1882 [ # # # # ]: 0 : throw libc_exception (errno, "cannot create temporary file");
1883 : :
1884 [ + - ]: 8 : ssize_t res = write_retry (fd, data->d_buf, data->d_size);
1885 [ + - - + ]: 8 : if (res < 0 || (size_t) res != data->d_size) {
1886 [ # # ]: 0 : close (fd);
1887 : 0 : unlink (tmppath);
1888 [ # # # # ]: 0 : throw libc_exception (errno, "cannot write to temporary file");
1889 : : }
1890 : :
1891 : : /* Set mtime to be the same as the parent file's mtime. */
1892 : 8 : struct timespec tvs[2];
1893 [ - + ]: 8 : if (fstat (elf_fd, &fs) != 0) {
1894 [ # # ]: 0 : close (fd);
1895 : 0 : unlink (tmppath);
1896 [ # # # # ]: 0 : throw libc_exception (errno, "cannot fstat file");
1897 : : }
1898 : :
1899 : 8 : tvs[0].tv_sec = 0;
1900 : 8 : tvs[0].tv_nsec = UTIME_OMIT;
1901 : 8 : tvs[1] = fs.st_mtim;
1902 : 8 : (void) futimens (fd, tvs);
1903 : :
1904 : 8 : struct timespec extract_end;
1905 : 8 : clock_gettime (CLOCK_MONOTONIC, &extract_end);
1906 : 8 : double extract_time = (extract_end.tv_sec - extract_begin.tv_sec)
1907 : 8 : + (extract_end.tv_nsec - extract_begin.tv_nsec)/1.e9;
1908 : :
1909 : : /* Add to fdcache. */
1910 [ + - + - ]: 8 : fdcache.intern (b_source, section, tmppath, data->d_size, true, extract_time);
1911 : 8 : break;
1912 : 14 : }
1913 : 234 : }
1914 : : }
1915 [ - - ]: 0 : catch (const reportable_exception &e)
1916 : : {
1917 [ - - ]: 0 : e.report (clog);
1918 [ - - ]: 0 : close (fd);
1919 : 0 : fd = -1;
1920 : 0 : }
1921 : :
1922 : 14 : elf_end (elf);
1923 : : return fd;
1924 : : }
1925 : :
1926 : : static struct MHD_Response*
1927 : 1126 : handle_buildid_f_match (bool internal_req_t,
1928 : : int64_t b_mtime,
1929 : : const string& b_source0,
1930 : : const string& section,
1931 : : int *result_fd)
1932 : : {
1933 : 1126 : (void) internal_req_t; // ignored
1934 : :
1935 : 1126 : struct timespec extract_begin;
1936 : 1126 : clock_gettime (CLOCK_MONOTONIC, &extract_begin);
1937 : :
1938 : 1126 : int fd = open(b_source0.c_str(), O_RDONLY);
1939 [ - + ]: 1126 : if (fd < 0)
1940 [ # # # # : 0 : throw libc_exception (errno, string("open ") + b_source0);
# # # # ]
1941 : :
1942 : : // NB: use manual close(2) in error case instead of defer_dtor, because
1943 : : // in the normal case, we want to hand the fd over to libmicrohttpd for
1944 : : // file transfer.
1945 : :
1946 : 1126 : struct stat s;
1947 : 1126 : int rc = fstat(fd, &s);
1948 [ - + ]: 1126 : if (rc < 0)
1949 : : {
1950 : 0 : close(fd);
1951 [ # # # # : 0 : throw libc_exception (errno, string("fstat ") + b_source0);
# # # # ]
1952 : : }
1953 : :
1954 [ - + ]: 1126 : if ((int64_t) s.st_mtime != b_mtime)
1955 : : {
1956 [ # # ]: 0 : if (verbose)
1957 [ # # # # ]: 0 : obatched(clog) << "mtime mismatch for " << b_source0 << endl;
1958 : 0 : close(fd);
1959 : 0 : return 0;
1960 : : }
1961 : :
1962 [ + + ]: 1126 : if (!section.empty ())
1963 : : {
1964 : 6 : int scn_fd = extract_section (fd, s.st_mtime, b_source0, section, extract_begin);
1965 : 6 : close (fd);
1966 : :
1967 [ + + ]: 6 : if (scn_fd >= 0)
1968 : 4 : fd = scn_fd;
1969 : : else
1970 : : {
1971 [ + - ]: 2 : if (verbose)
1972 [ + - ]: 6 : obatched (clog) << "cannot find section " << section
1973 [ + - + - : 2 : << " for " << b_source0 << endl;
+ - ]
1974 : 2 : return 0;
1975 : : }
1976 : :
1977 : 4 : rc = fstat(fd, &s);
1978 [ - + ]: 4 : if (rc < 0)
1979 : : {
1980 : 0 : close (fd);
1981 [ # # # # : 0 : throw libc_exception (errno, string ("fstat ") + b_source0
# # # # #
# # # #
# ]
1982 [ # # # # : 0 : + string (" ") + section);
# # # # #
# ]
1983 : : }
1984 : : }
1985 : :
1986 : 1124 : struct MHD_Response* r = MHD_create_response_from_fd ((uint64_t) s.st_size, fd);
1987 [ + - + - : 2248 : inc_metric ("http_responses_total","result","file");
+ - - + -
+ - - -
- ]
1988 [ - + ]: 1124 : if (r == 0)
1989 : : {
1990 [ # # ]: 0 : if (verbose)
1991 [ # # ]: 0 : obatched(clog) << "cannot create fd-response for " << b_source0
1992 [ # # # # : 0 : << " section=" << section << endl;
# # ]
1993 : 0 : close(fd);
1994 : : }
1995 : : else
1996 : : {
1997 : 1124 : add_mhd_response_header (r, "Content-Type", "application/octet-stream");
1998 [ + - ]: 1124 : add_mhd_response_header (r, "X-DEBUGINFOD-SIZE",
1999 : 1124 : to_string(s.st_size).c_str());
2000 : 1124 : add_mhd_response_header (r, "X-DEBUGINFOD-FILE", b_source0.c_str());
2001 : 1124 : add_mhd_last_modified (r, s.st_mtime);
2002 [ + - ]: 1124 : if (verbose > 1)
2003 [ + - + - : 2248 : obatched(clog) << "serving file " << b_source0 << " section=" << section << endl;
+ - + - ]
2004 : : /* libmicrohttpd will close it. */
2005 [ - + ]: 1124 : if (result_fd)
2006 : 1124 : *result_fd = fd;
2007 : : }
2008 : :
2009 : : return r;
2010 : : }
2011 : :
2012 : :
2013 : : #ifdef USE_LZMA
2014 : : struct lzma_exception: public reportable_exception
2015 : : {
2016 : 0 : lzma_exception(int rc, const string& msg):
2017 : : // liblzma doesn't have a lzma_ret -> string conversion function, so just
2018 : : // report the value.
2019 [ # # # # : 0 : reportable_exception(string ("lzma error: ") + msg + ": error " + to_string(rc)) {
# # # # #
# # # # #
# # # # ]
2020 [ # # # # : 0 : inc_metric("error_count","lzma",to_string(rc));
# # # # #
# # # #
# ]
2021 [ # # ]: 0 : }
2022 : : };
2023 : :
2024 : : // Neither RPM nor deb files support seeking to a specific file in the package.
2025 : : // Instead, to extract a specific file, we normally need to read the archive
2026 : : // sequentially until we find the file. This is very slow for files at the end
2027 : : // of a large package with lots of files, like kernel debuginfo.
2028 : : //
2029 : : // However, if the compression format used in the archive supports seeking, we
2030 : : // can accelerate this. As of July 2024, xz is the only widely-used format that
2031 : : // supports seeking, and usually only in multi-threaded mode. Luckily, the
2032 : : // kernel-debuginfo package in Fedora and its downstreams, and the
2033 : : // linux-image-*-dbg package in Debian and its downstreams, all happen to use
2034 : : // this.
2035 : : //
2036 : : // The xz format [1] ends with an index of independently compressed blocks in
2037 : : // the stream. In RPM and deb files, the xz stream is the last thing in the
2038 : : // file, so we assume that the xz Stream Footer is at the end of the package
2039 : : // file and do everything relative to that. For each file in the archive, we
2040 : : // remember the size and offset of the file data in the uncompressed xz stream,
2041 : : // then we use the index to seek to that offset when we need that file.
2042 : : //
2043 : : // 1: https://xz.tukaani.org/format/xz-file-format.txt
2044 : :
2045 : : // Return whether an archive supports seeking.
2046 : : static bool
2047 : 530 : is_seekable_archive (const string& rps, struct archive* a)
2048 : : {
2049 : : // Only xz supports seeking.
2050 [ + + ]: 530 : if (archive_filter_code (a, 0) != ARCHIVE_FILTER_XZ)
2051 : : return false;
2052 : :
2053 : 336 : int fd = open (rps.c_str(), O_RDONLY);
2054 [ - + ]: 336 : if (fd < 0)
2055 : : return false;
2056 : 336 : defer_dtor<int,int> fd_closer (fd, close);
2057 : :
2058 : : // Seek to the xz Stream Footer. We assume that it's the last thing in the
2059 : : // file, which is true for RPM and deb files.
2060 : 336 : off_t footer_pos = -LZMA_STREAM_HEADER_SIZE;
2061 [ - + ]: 336 : if (lseek (fd, footer_pos, SEEK_END) == -1)
2062 : : return false;
2063 : :
2064 : : // Decode the Stream Footer.
2065 : : uint8_t footer[LZMA_STREAM_HEADER_SIZE];
2066 : : size_t footer_read = 0;
2067 [ + + ]: 672 : while (footer_read < sizeof (footer))
2068 : : {
2069 [ - + ]: 336 : ssize_t bytes_read = read (fd, footer + footer_read,
2070 : : sizeof (footer) - footer_read);
2071 [ - + ]: 336 : if (bytes_read < 0)
2072 : : {
2073 [ # # ]: 0 : if (errno == EINTR)
2074 : 0 : continue;
2075 : : return false;
2076 : : }
2077 [ - + ]: 336 : if (bytes_read == 0)
2078 : : return false;
2079 : 336 : footer_read += bytes_read;
2080 : : }
2081 : :
2082 : 336 : lzma_stream_flags stream_flags;
2083 : 336 : lzma_ret ret = lzma_stream_footer_decode (&stream_flags, footer);
2084 [ - + ]: 336 : if (ret != LZMA_OK)
2085 : : return false;
2086 : :
2087 : : // Seek to the xz Index.
2088 [ - + ]: 336 : if (lseek (fd, footer_pos - stream_flags.backward_size, SEEK_END) == -1)
2089 : : return false;
2090 : :
2091 : : // Decode the Number of Records in the Index. liblzma doesn't have an API for
2092 : : // this if you don't want to decode the whole Index, so we have to do it
2093 : : // ourselves.
2094 : : //
2095 : : // We need 1 byte for the Index Indicator plus 1-9 bytes for the
2096 : : // variable-length integer Number of Records.
2097 : : uint8_t index[10];
2098 : : size_t index_read = 0;
2099 [ + + ]: 672 : while (index_read == 0) {
2100 [ + - ]: 336 : ssize_t bytes_read = read (fd, index, sizeof (index));
2101 [ - + ]: 336 : if (bytes_read < 0)
2102 : : {
2103 [ # # ]: 0 : if (errno == EINTR)
2104 : 0 : continue;
2105 : : return false;
2106 : : }
2107 [ - + ]: 336 : if (bytes_read == 0)
2108 : : return false;
2109 : 336 : index_read += bytes_read;
2110 : : }
2111 : : // The Index Indicator must be 0.
2112 [ - + ]: 336 : if (index[0] != 0)
2113 : : return false;
2114 : :
2115 : 336 : lzma_vli num_records;
2116 : 336 : size_t pos = 0;
2117 : 336 : size_t in_pos = 1;
2118 : 336 : while (true)
2119 : : {
2120 [ - + ]: 336 : if (in_pos >= index_read)
2121 : : {
2122 [ # # ]: 0 : ssize_t bytes_read = read (fd, index, sizeof (index));
2123 [ # # ]: 0 : if (bytes_read < 0)
2124 : : {
2125 [ # # ]: 0 : if (errno == EINTR)
2126 : 0 : continue;
2127 : : return false;
2128 : : }
2129 [ # # ]: 0 : if (bytes_read == 0)
2130 : : return false;
2131 : 0 : index_read = bytes_read;
2132 : 0 : in_pos = 0;
2133 : : }
2134 : 336 : ret = lzma_vli_decode (&num_records, &pos, index, &in_pos, index_read);
2135 [ - + ]: 336 : if (ret == LZMA_STREAM_END)
2136 : : break;
2137 [ # # ]: 0 : else if (ret != LZMA_OK)
2138 : : return false;
2139 : : }
2140 : :
2141 [ + + ]: 336 : if (verbose > 3)
2142 [ + - + - : 572 : obatched(clog) << rps << " has " << num_records << " xz Blocks" << endl;
+ - + - +
- ]
2143 : :
2144 : : // The file is only seekable if it has more than one Block.
2145 : 336 : return num_records > 1;
2146 : 530 : }
2147 : :
2148 : : // Read the Index at the end of an xz file.
2149 : : static lzma_index*
2150 : 94 : read_xz_index (int fd)
2151 : : {
2152 : 94 : off_t footer_pos = -LZMA_STREAM_HEADER_SIZE;
2153 [ - + ]: 94 : if (lseek (fd, footer_pos, SEEK_END) == -1)
2154 [ # # # # ]: 0 : throw libc_exception (errno, "lseek");
2155 : :
2156 : : uint8_t footer[LZMA_STREAM_HEADER_SIZE];
2157 : : size_t footer_read = 0;
2158 [ + + ]: 188 : while (footer_read < sizeof (footer))
2159 : : {
2160 [ - + ]: 94 : ssize_t bytes_read = read (fd, footer + footer_read,
2161 : : sizeof (footer) - footer_read);
2162 [ - + ]: 94 : if (bytes_read < 0)
2163 : : {
2164 [ # # ]: 0 : if (errno == EINTR)
2165 : 0 : continue;
2166 [ # # # # ]: 0 : throw libc_exception (errno, "read");
2167 : : }
2168 [ - + ]: 94 : if (bytes_read == 0)
2169 [ # # # # ]: 0 : throw reportable_exception ("truncated file");
2170 : 94 : footer_read += bytes_read;
2171 : : }
2172 : :
2173 : 94 : lzma_stream_flags stream_flags;
2174 : 94 : lzma_ret ret = lzma_stream_footer_decode (&stream_flags, footer);
2175 [ - + ]: 94 : if (ret != LZMA_OK)
2176 [ # # # # ]: 0 : throw lzma_exception (ret, "lzma_stream_footer_decode");
2177 : :
2178 [ - + ]: 94 : if (lseek (fd, footer_pos - stream_flags.backward_size, SEEK_END) == -1)
2179 [ # # # # ]: 0 : throw libc_exception (errno, "lseek");
2180 : :
2181 : 94 : lzma_stream strm = LZMA_STREAM_INIT;
2182 : 94 : lzma_index* index = NULL;
2183 : 94 : ret = lzma_index_decoder (&strm, &index, UINT64_MAX);
2184 [ - + ]: 94 : if (ret != LZMA_OK)
2185 [ # # # # ]: 0 : throw lzma_exception (ret, "lzma_index_decoder");
2186 : 94 : defer_dtor<lzma_stream*,void> strm_ender (&strm, lzma_end);
2187 : :
2188 : 94 : uint8_t in_buf[4096];
2189 : 94 : while (true)
2190 : : {
2191 [ + - ]: 94 : if (strm.avail_in == 0)
2192 : : {
2193 [ + - ]: 94 : ssize_t bytes_read = read (fd, in_buf, sizeof (in_buf));
2194 [ - + ]: 94 : if (bytes_read < 0)
2195 : : {
2196 [ # # ]: 0 : if (errno == EINTR)
2197 : 0 : continue;
2198 [ # # # # ]: 0 : throw libc_exception (errno, "read");
2199 : : }
2200 [ - + ]: 94 : if (bytes_read == 0)
2201 [ # # # # ]: 0 : throw reportable_exception ("truncated file");
2202 : 94 : strm.avail_in = bytes_read;
2203 : 94 : strm.next_in = in_buf;
2204 : : }
2205 : :
2206 : 94 : ret = lzma_code (&strm, LZMA_RUN);
2207 [ - + ]: 94 : if (ret == LZMA_STREAM_END)
2208 : : break;
2209 [ # # ]: 0 : else if (ret != LZMA_OK)
2210 [ # # # # ]: 0 : throw lzma_exception (ret, "lzma_code index");
2211 : : }
2212 : :
2213 : 94 : ret = lzma_index_stream_flags (index, &stream_flags);
2214 [ - + ]: 94 : if (ret != LZMA_OK)
2215 : : {
2216 : 0 : lzma_index_end (index, NULL);
2217 [ # # # # ]: 0 : throw lzma_exception (ret, "lzma_index_stream_flags");
2218 : : }
2219 : 94 : return index;
2220 : 94 : }
2221 : :
2222 : : static void
2223 : 94 : my_lzma_index_end (lzma_index* index)
2224 : : {
2225 : 94 : lzma_index_end (index, NULL);
2226 : 94 : }
2227 : :
2228 : : static void
2229 : 102 : free_lzma_block_filter_options (lzma_block* block)
2230 : : {
2231 [ + + ]: 510 : for (int i = 0; i < LZMA_FILTERS_MAX; i++)
2232 : : {
2233 : 408 : free (block->filters[i].options);
2234 : 408 : block->filters[i].options = NULL;
2235 : : }
2236 : 102 : }
2237 : :
2238 : : static void
2239 : 94 : free_lzma_block_filters (lzma_block* block)
2240 : : {
2241 [ + - ]: 94 : if (block->filters != NULL)
2242 : : {
2243 : 94 : free_lzma_block_filter_options (block);
2244 : 94 : free (block->filters);
2245 : : }
2246 : 94 : }
2247 : :
2248 : : static void
2249 : 94 : extract_xz_blocks_into_fd (const string& srcpath,
2250 : : int src,
2251 : : int dst,
2252 : : lzma_index_iter* iter,
2253 : : uint64_t offset,
2254 : : uint64_t size)
2255 : : {
2256 : : // Seek to the Block. Seeking from the end using the compressed size from the
2257 : : // footer means we don't need to know where the xz stream starts in the
2258 : : // archive.
2259 [ - + ]: 94 : if (lseek (src,
2260 : 94 : (off_t) iter->block.compressed_stream_offset
2261 : 94 : - (off_t) iter->stream.compressed_size,
2262 : : SEEK_END) == -1)
2263 [ # # # # ]: 0 : throw libc_exception (errno, "lseek");
2264 : :
2265 : 94 : offset -= iter->block.uncompressed_file_offset;
2266 : :
2267 : 94 : lzma_block block{};
2268 : 94 : block.filters = (lzma_filter*) calloc (LZMA_FILTERS_MAX + 1,
2269 : : sizeof (lzma_filter));
2270 [ - + ]: 94 : if (block.filters == NULL)
2271 [ # # # # ]: 0 : throw libc_exception (ENOMEM, "cannot allocate lzma_block filters");
2272 : 94 : defer_dtor<lzma_block*,void> filters_freer (&block, free_lzma_block_filters);
2273 : :
2274 : 94 : uint8_t in_buf[4096];
2275 : 94 : uint8_t out_buf[4096];
2276 : 94 : size_t header_read = 0;
2277 : 94 : bool need_log_extracting = verbose > 3;
2278 : : while (true)
2279 : : {
2280 : : // The first byte of the Block is the encoded Block Header Size. Read the
2281 : : // first byte and whatever extra fits in the buffer.
2282 [ + + ]: 196 : while (header_read == 0)
2283 : : {
2284 [ + - ]: 94 : ssize_t bytes_read = read (src, in_buf, sizeof (in_buf));
2285 [ - + ]: 94 : if (bytes_read < 0)
2286 : : {
2287 [ # # ]: 0 : if (errno == EINTR)
2288 : 0 : continue;
2289 [ # # # # ]: 0 : throw libc_exception (errno, "read");
2290 : : }
2291 [ - + ]: 94 : if (bytes_read == 0)
2292 [ # # # # ]: 0 : throw reportable_exception ("truncated file");
2293 : 94 : header_read += bytes_read;
2294 : : }
2295 : :
2296 : 102 : block.header_size = lzma_block_header_size_decode (in_buf[0]);
2297 : :
2298 : : // If we didn't buffer the whole Block Header earlier, get the rest.
2299 : 102 : eu_static_assert (sizeof (in_buf)
2300 : : >= lzma_block_header_size_decode (UINT8_MAX));
2301 [ - + ]: 102 : while (header_read < block.header_size)
2302 : : {
2303 [ # # ]: 0 : ssize_t bytes_read = read (src, in_buf + header_read,
2304 : : sizeof (in_buf) - header_read);
2305 [ # # ]: 0 : if (bytes_read < 0)
2306 : : {
2307 [ # # ]: 0 : if (errno == EINTR)
2308 : 0 : continue;
2309 [ # # # # ]: 0 : throw libc_exception (errno, "read");
2310 : : }
2311 [ # # ]: 0 : if (bytes_read == 0)
2312 [ # # # # ]: 0 : throw reportable_exception ("truncated file");
2313 : 0 : header_read += bytes_read;
2314 : : }
2315 : :
2316 : : // Decode the Block Header.
2317 : 102 : block.check = iter->stream.flags->check;
2318 : 102 : lzma_ret ret = lzma_block_header_decode (&block, NULL, in_buf);
2319 [ - + ]: 102 : if (ret != LZMA_OK)
2320 [ # # # # ]: 0 : throw lzma_exception (ret, "lzma_block_header_decode");
2321 : 102 : ret = lzma_block_compressed_size (&block, iter->block.unpadded_size);
2322 [ - + ]: 102 : if (ret != LZMA_OK)
2323 [ # # # # ]: 0 : throw lzma_exception (ret, "lzma_block_compressed_size");
2324 : :
2325 : : // Start decoding the Block data.
2326 : 102 : lzma_stream strm = LZMA_STREAM_INIT;
2327 : 102 : ret = lzma_block_decoder (&strm, &block);
2328 [ - + ]: 102 : if (ret != LZMA_OK)
2329 [ # # # # ]: 0 : throw lzma_exception (ret, "lzma_block_decoder");
2330 : 102 : defer_dtor<lzma_stream*,void> strm_ender (&strm, lzma_end);
2331 : :
2332 : : // We might still have some input buffered from when we read the header.
2333 : 102 : strm.avail_in = header_read - block.header_size;
2334 : 102 : strm.next_in = in_buf + block.header_size;
2335 : 102 : strm.avail_out = sizeof (out_buf);
2336 : 102 : strm.next_out = out_buf;
2337 : 11518 : while (true)
2338 : : {
2339 [ + + ]: 11518 : if (strm.avail_in == 0)
2340 : : {
2341 [ + - ]: 14 : ssize_t bytes_read = read (src, in_buf, sizeof (in_buf));
2342 [ - + ]: 14 : if (bytes_read < 0)
2343 : : {
2344 [ # # ]: 0 : if (errno == EINTR)
2345 : 0 : continue;
2346 [ # # # # ]: 0 : throw libc_exception (errno, "read");
2347 : : }
2348 [ - + ]: 14 : if (bytes_read == 0)
2349 [ # # # # ]: 0 : throw reportable_exception ("truncated file");
2350 : 14 : strm.avail_in = bytes_read;
2351 : 14 : strm.next_in = in_buf;
2352 : : }
2353 : :
2354 : 11518 : ret = lzma_code (&strm, LZMA_RUN);
2355 [ - + ]: 11518 : if (ret != LZMA_OK && ret != LZMA_STREAM_END)
2356 [ # # # # ]: 0 : throw lzma_exception (ret, "lzma_code block");
2357 : :
2358 : : // Throw away anything we decode until we reach the offset, then
2359 : : // start writing to the destination.
2360 [ + + ]: 11518 : if (strm.total_out > offset)
2361 : : {
2362 : 2898 : size_t bytes_to_write = strm.next_out - out_buf;
2363 : 2898 : uint8_t* buf_to_write = out_buf;
2364 : :
2365 : : // Ignore anything in the buffer before the offset.
2366 [ + + ]: 2898 : if (bytes_to_write > strm.total_out - offset)
2367 : : {
2368 : 88 : buf_to_write += bytes_to_write - (strm.total_out - offset);
2369 : 88 : bytes_to_write = strm.total_out - offset;
2370 : : }
2371 : :
2372 : : // Ignore anything after the size.
2373 [ + + ]: 2898 : if (strm.total_out - offset >= size)
2374 : 94 : bytes_to_write -= strm.total_out - offset - size;
2375 : :
2376 [ + + ]: 2898 : if (need_log_extracting)
2377 : : {
2378 [ + - + - ]: 24 : obatched(clog) << "extracting from xz archive " << srcpath
2379 [ + - + - : 12 : << " size=" << size << endl;
+ - ]
2380 : 12 : need_log_extracting = false;
2381 : : }
2382 : :
2383 [ + + ]: 5796 : while (bytes_to_write > 0)
2384 : : {
2385 [ + - ]: 2898 : ssize_t written = write (dst, buf_to_write, bytes_to_write);
2386 [ - + ]: 2898 : if (written < 0)
2387 : : {
2388 [ # # ]: 0 : if (errno == EAGAIN)
2389 : 0 : continue;
2390 [ # # # # ]: 0 : throw libc_exception (errno, "write");
2391 : : }
2392 : 2898 : bytes_to_write -= written;
2393 : 2898 : buf_to_write += written;
2394 : : }
2395 : :
2396 : : // If we reached the size, we're done.
2397 [ + + ]: 2898 : if (strm.total_out - offset >= size)
2398 : 94 : return;
2399 : : }
2400 : :
2401 : 11424 : strm.avail_out = sizeof (out_buf);
2402 : 11424 : strm.next_out = out_buf;
2403 : :
2404 [ + + ]: 11424 : if (ret == LZMA_STREAM_END)
2405 : : break;
2406 : : }
2407 : :
2408 : : // This Block didn't have enough data. Go to the next one.
2409 [ - + ]: 8 : if (lzma_index_iter_next (iter, LZMA_INDEX_ITER_BLOCK))
2410 [ # # # # ]: 0 : throw reportable_exception ("no more blocks");
2411 [ + - ]: 8 : if (strm.total_out > offset)
2412 : 8 : size -= strm.total_out - offset;
2413 : 8 : offset = 0;
2414 : : // If we had any buffered input left, move it to the beginning of the
2415 : : // buffer to decode the next Block Header.
2416 [ + - ]: 8 : if (strm.avail_in > 0)
2417 : : {
2418 : 8 : memmove (in_buf, strm.next_in, strm.avail_in);
2419 : 8 : header_read = strm.avail_in;
2420 : : }
2421 : : else
2422 : : header_read = 0;
2423 : 8 : free_lzma_block_filter_options (&block);
2424 : 102 : }
2425 : 94 : }
2426 : :
2427 : : static int
2428 : 94 : extract_from_seekable_archive (const string& srcpath,
2429 : : char* tmppath,
2430 : : uint64_t offset,
2431 : : uint64_t size)
2432 : : {
2433 [ + - + - : 188 : inc_metric ("seekable_archive_extraction_attempts","type","xz");
+ - - + -
+ - - -
- ]
2434 : 94 : try
2435 : : {
2436 [ + - ]: 94 : int src = open (srcpath.c_str(), O_RDONLY);
2437 [ - + ]: 94 : if (src < 0)
2438 [ # # # # : 0 : throw libc_exception (errno, string("open ") + srcpath);
# # # # ]
2439 : 94 : defer_dtor<int,int> src_closer (src, close);
2440 : :
2441 [ + - ]: 94 : lzma_index* index = read_xz_index (src);
2442 : 94 : defer_dtor<lzma_index*,void> index_ender (index, my_lzma_index_end);
2443 : :
2444 : : // Find the Block containing the offset.
2445 : 94 : lzma_index_iter iter;
2446 : 94 : lzma_index_iter_init (&iter, index);
2447 [ - + ]: 94 : if (lzma_index_iter_locate (&iter, offset))
2448 [ # # # # ]: 0 : throw reportable_exception ("offset not found");
2449 : :
2450 [ + + ]: 94 : if (verbose > 3)
2451 [ + - + - ]: 36 : obatched(clog) << "seeking in xz archive " << srcpath
2452 [ + - + - : 12 : << " offset=" << offset << " block_offset="
+ - ]
2453 [ + - + - ]: 12 : << iter.block.uncompressed_file_offset << endl;
2454 : :
2455 [ + - ]: 94 : int dst = mkstemp (tmppath);
2456 [ - + ]: 94 : if (dst < 0)
2457 [ # # # # ]: 0 : throw libc_exception (errno, "cannot create temporary file");
2458 : :
2459 : 94 : try
2460 : : {
2461 [ + - ]: 94 : extract_xz_blocks_into_fd (srcpath, src, dst, &iter, offset, size);
2462 : : }
2463 : 0 : catch (...)
2464 : : {
2465 : 0 : unlink (tmppath);
2466 [ - - ]: 0 : close (dst);
2467 : 0 : throw;
2468 : 0 : }
2469 : :
2470 [ + - + - : 188 : inc_metric ("seekable_archive_extraction_successes","type","xz");
+ - + - -
+ - + - -
- - ]
2471 : 94 : return dst;
2472 : 94 : }
2473 [ - - ]: 0 : catch (const reportable_exception &e)
2474 : : {
2475 [ - - - - : 0 : inc_metric ("seekable_archive_extraction_failures","type","xz");
- - - - -
- - - - -
- - ]
2476 [ - - ]: 0 : if (verbose)
2477 [ - - - - ]: 0 : obatched(clog) << "failed to extract from seekable xz archive "
2478 [ - - - - : 0 : << srcpath << ": " << e.message << endl;
- - ]
2479 : 0 : return -1;
2480 : 0 : }
2481 : : }
2482 : : #else
2483 : : static bool
2484 : : is_seekable_archive (const string& rps, struct archive* a)
2485 : : {
2486 : : return false;
2487 : : }
2488 : : static int
2489 : : extract_from_seekable_archive (const string& srcpath,
2490 : : char* tmppath,
2491 : : uint64_t offset,
2492 : : uint64_t size)
2493 : : {
2494 : : return -1;
2495 : : }
2496 : : #endif
2497 : :
2498 : :
2499 : : // For security/portability reasons, many distro-package archives have
2500 : : // a "./" in front of path names; others have nothing, others have
2501 : : // "/". Canonicalize them all to a single leading "/", with the
2502 : : // assumption that this matches the dwarf-derived file names too.
2503 : 1314 : string canonicalized_archive_entry_pathname(struct archive_entry *e)
2504 : : {
2505 : 1314 : string fn = archive_entry_pathname(e);
2506 [ - + ]: 1314 : if (fn.size() == 0)
2507 : 0 : return fn;
2508 [ - + ]: 1314 : if (fn[0] == '/')
2509 : 0 : return fn;
2510 [ + + ]: 1314 : if (fn[0] == '.')
2511 [ + - ]: 1082 : return fn.substr(1);
2512 : : else
2513 [ + - + - : 464 : return string("/")+fn;
- - ]
2514 : 1314 : }
2515 : :
2516 : :
2517 : : // NB: takes ownership of, and may reassign, fd.
2518 : : static struct MHD_Response*
2519 : 1002 : create_buildid_r_response (int64_t b_mtime0,
2520 : : const string& b_source0,
2521 : : const string& b_source1,
2522 : : const string& section,
2523 : : const string& ima_sig,
2524 : : const char* tmppath,
2525 : : int& fd,
2526 : : off_t size,
2527 : : time_t mtime,
2528 : : const string& metric,
2529 : : const struct timespec& extract_begin)
2530 : : {
2531 [ + + ]: 1002 : if (tmppath != NULL)
2532 : : {
2533 : 230 : struct timespec extract_end;
2534 : 230 : clock_gettime (CLOCK_MONOTONIC, &extract_end);
2535 : 230 : double extract_time = (extract_end.tv_sec - extract_begin.tv_sec)
2536 : 230 : + (extract_end.tv_nsec - extract_begin.tv_nsec)/1.e9;
2537 [ + - ]: 460 : fdcache.intern(b_source0, b_source1, tmppath, size, true, extract_time);
2538 : : }
2539 : :
2540 [ + + ]: 1002 : if (!section.empty ())
2541 : : {
2542 [ + - + - ]: 8 : int scn_fd = extract_section (fd, b_mtime0,
2543 [ + - - + ]: 16 : b_source0 + ":" + b_source1,
2544 : : section, extract_begin);
2545 : 8 : close (fd);
2546 [ + + ]: 8 : if (scn_fd >= 0)
2547 : 4 : fd = scn_fd;
2548 : : else
2549 : : {
2550 [ + - ]: 4 : if (verbose)
2551 [ + - ]: 12 : obatched (clog) << "cannot find section " << section
2552 : : << " for archive " << b_source0
2553 [ + - + - : 4 : << " file " << b_source1 << endl;
+ - + - +
- ]
2554 : 4 : return 0;
2555 : : }
2556 : :
2557 : 4 : struct stat fs;
2558 [ - + ]: 4 : if (fstat (fd, &fs) < 0)
2559 : : {
2560 : 0 : close (fd);
2561 [ # # # # ]: 0 : throw libc_exception (errno,
2562 [ # # # # : 0 : string ("fstat ") + b_source0 + string (" ") + section);
# # # # #
# # # # #
# # # # #
# ]
2563 : : }
2564 : 4 : size = fs.st_size;
2565 : : }
2566 : :
2567 : 998 : struct MHD_Response* r = MHD_create_response_from_fd (size, fd);
2568 [ - + ]: 998 : if (r == 0)
2569 : : {
2570 [ # # ]: 0 : if (verbose)
2571 [ # # # # ]: 0 : obatched(clog) << "cannot create fd-response for " << b_source0 << endl;
2572 : 0 : close(fd);
2573 : : }
2574 : : else
2575 : : {
2576 [ + - + - : 1996 : inc_metric ("http_responses_total","result",metric);
- + - - ]
2577 : 998 : add_mhd_response_header (r, "Content-Type", "application/octet-stream");
2578 [ + - ]: 998 : add_mhd_response_header (r, "X-DEBUGINFOD-SIZE", to_string(size).c_str());
2579 : 998 : add_mhd_response_header (r, "X-DEBUGINFOD-ARCHIVE", b_source0.c_str());
2580 : 998 : add_mhd_response_header (r, "X-DEBUGINFOD-FILE", b_source1.c_str());
2581 [ - + ]: 998 : if(!ima_sig.empty()) add_mhd_response_header(r, "X-DEBUGINFOD-IMASIGNATURE", ima_sig.c_str());
2582 : 998 : add_mhd_last_modified (r, mtime);
2583 [ - + ]: 998 : if (verbose > 1)
2584 [ + - ]: 2994 : obatched(clog) << "serving " << metric << " " << b_source0
2585 : : << " file " << b_source1
2586 : : << " section=" << section
2587 [ + - + - : 998 : << " IMA signature=" << ima_sig << endl;
+ - + - +
- + - + -
+ - + - ]
2588 : : /* libmicrohttpd will close fd. */
2589 : : }
2590 : : return r;
2591 : : }
2592 : :
2593 : : static struct MHD_Response*
2594 : 1058 : handle_buildid_r_match (bool internal_req_p,
2595 : : int64_t b_mtime,
2596 : : const string& b_source0,
2597 : : const string& b_source1,
2598 : : int64_t b_id0,
2599 : : int64_t b_id1,
2600 : : const string& section,
2601 : : int *result_fd)
2602 : : {
2603 : 1058 : struct timespec extract_begin;
2604 : 1058 : clock_gettime (CLOCK_MONOTONIC, &extract_begin);
2605 : :
2606 : 1058 : struct stat fs;
2607 : 1058 : int rc = stat (b_source0.c_str(), &fs);
2608 [ + + ]: 1058 : if (rc != 0)
2609 [ + - + - : 116 : throw libc_exception (errno, string("stat ") + b_source0);
+ - - + ]
2610 : :
2611 [ - + ]: 1000 : if ((int64_t) fs.st_mtime != b_mtime)
2612 : : {
2613 [ # # ]: 0 : if (verbose)
2614 [ # # # # ]: 0 : obatched(clog) << "mtime mismatch for " << b_source0 << endl;
2615 : 0 : return 0;
2616 : : }
2617 : :
2618 : : // Extract the IMA per-file signature (if it exists)
2619 : 1000 : string ima_sig = "";
2620 : : #ifdef ENABLE_IMA_VERIFICATION
2621 : : do
2622 : : {
2623 : : FD_t rpm_fd;
2624 : : if(!(rpm_fd = Fopen(b_source0.c_str(), "r.ufdio"))) // read, uncompressed, rpm/rpmio.h
2625 : : {
2626 : : if (verbose) obatched(clog) << "There was an error while opening " << b_source0 << endl;
2627 : : break; // Exit IMA extraction
2628 : : }
2629 : :
2630 : : Header rpm_hdr;
2631 : : if(RPMRC_FAIL == rpmReadPackageFile(NULL, rpm_fd, b_source0.c_str(), &rpm_hdr))
2632 : : {
2633 : : if (verbose) obatched(clog) << "There was an error while reading the header of " << b_source0 << endl;
2634 : : Fclose(rpm_fd);
2635 : : break; // Exit IMA extraction
2636 : : }
2637 : :
2638 : : // Fill sig_tag_data with an alloc'd copy of the array of IMA signatures (if they exist)
2639 : : struct rpmtd_s sig_tag_data;
2640 : : rpmtdReset(&sig_tag_data);
2641 : : do{ /* A do-while so we can break out of the koji sigcache checking on failure */
2642 : : if(requires_koji_sigcache_mapping)
2643 : : {
2644 : : /* NB: Koji builds result in a directory structure like the following
2645 : : - PACKAGE/VERSION/RELEASE
2646 : : - ARCH1
2647 : : - foo.rpm // The rpm known by debuginfod
2648 : : - ...
2649 : : - ARCHN
2650 : : - data
2651 : : - signed // Periodically purged (and not scanned by debuginfod)
2652 : : - sigcache
2653 : : - ARCH1
2654 : : - foo.rpm.sig // An empty rpm header
2655 : : - ...
2656 : : - ARCHN
2657 : : - PACKAGE_KEYID1
2658 : : - ARCH1
2659 : : - foo.rpm.sig // The header of the signed rpm. This is the file we need to extract the IMA signatures
2660 : : - ...
2661 : : - ARCHN
2662 : : - ...
2663 : : - PACKAGE_KEYIDn
2664 : :
2665 : : We therefore need to do a mapping:
2666 : :
2667 : : P/V/R/A/N-V-R.A.rpm ->
2668 : : P/V/R/data/sigcache/KEYID/A/N-V-R.A.rpm.sig
2669 : :
2670 : : There are 2 key insights here
2671 : :
2672 : : 1. We need to go 2 directories down from sigcache to get to the
2673 : : rpm header. So to distinguish ARCH1/foo.rpm.sig and
2674 : : PACKAGE_KEYID1/ARCH1/foo.rpm.sig we can look 2 directories down
2675 : :
2676 : : 2. It's safe to assume that the user will have all of the
2677 : : required verification certs. So we can pick from any of the
2678 : : PACKAGE_KEYID* directories. For simplicity we choose first we
2679 : : match against
2680 : :
2681 : : See: https://pagure.io/koji/issue/3670
2682 : : */
2683 : :
2684 : : // Do the mapping from b_source0 to the koji path for the signed rpm header
2685 : : string signed_rpm_path = b_source0;
2686 : : size_t insert_pos = string::npos;
2687 : : for(int i = 0; i < 2; i++) insert_pos = signed_rpm_path.rfind("/", insert_pos) - 1;
2688 : : string globbed_path = signed_rpm_path.insert(insert_pos + 1, "/data/sigcache/*").append(".sig"); // The globbed path we're seeking
2689 : : glob_t pglob;
2690 : : int grc;
2691 : : if(0 != (grc = glob(globbed_path.c_str(), GLOB_NOSORT, NULL, &pglob)))
2692 : : {
2693 : : // Break out, but only report real errors
2694 : : if (verbose && grc != GLOB_NOMATCH) obatched(clog) << "There was an error (" << strerror(errno) << ") globbing " << globbed_path << endl;
2695 : : break; // Exit koji sigcache check
2696 : : }
2697 : : signed_rpm_path = pglob.gl_pathv[0]; // See insight 2 above
2698 : : globfree(&pglob);
2699 : :
2700 : : if (verbose > 2) obatched(clog) << "attempting IMA signature extraction from koji header " << signed_rpm_path << endl;
2701 : :
2702 : : FD_t sig_rpm_fd;
2703 : : if(NULL == (sig_rpm_fd = Fopen(signed_rpm_path.c_str(), "r")))
2704 : : {
2705 : : if (verbose) obatched(clog) << "There was an error while opening " << signed_rpm_path << endl;
2706 : : break; // Exit koji sigcache check
2707 : : }
2708 : :
2709 : : Header sig_hdr = headerRead(sig_rpm_fd, HEADER_MAGIC_YES /* Validate magic too */ );
2710 : : if (!sig_hdr || 1 != headerGet(sig_hdr, RPMSIGTAG_FILESIGNATURES, &sig_tag_data, HEADERGET_ALLOC))
2711 : : {
2712 : : if (verbose) obatched(clog) << "Unable to extract RPMSIGTAG_FILESIGNATURES from " << signed_rpm_path << endl;
2713 : : }
2714 : : headerFree(sig_hdr); // We can free here since sig_tag_data has an alloc'd copy of the data
2715 : : Fclose(sig_rpm_fd);
2716 : : }
2717 : : }while(false);
2718 : :
2719 : : if(0 == sig_tag_data.count)
2720 : : {
2721 : : // In the general case (or a fallback from the koji sigcache mapping not finding signatures)
2722 : : // we can just (try) extract the signatures from the rpm header
2723 : : if (1 != headerGet(rpm_hdr, RPMTAG_FILESIGNATURES, &sig_tag_data, HEADERGET_ALLOC))
2724 : : {
2725 : : if (verbose) obatched(clog) << "Unable to extract RPMTAG_FILESIGNATURES from " << b_source0 << endl;
2726 : : }
2727 : : }
2728 : : // Search the array for the signature coresponding to b_source1
2729 : : int idx = -1;
2730 : : char *sig = NULL;
2731 : : rpmfi hdr_fi = rpmfiNew(NULL, rpm_hdr, RPMTAG_BASENAMES, RPMFI_FLAGS_QUERY);
2732 : : do
2733 : : {
2734 : : sig = (char*)rpmtdNextString(&sig_tag_data);
2735 : : idx = rpmfiNext(hdr_fi);
2736 : : }
2737 : : while (idx != -1 && 0 != strcmp(b_source1.c_str(), rpmfiFN(hdr_fi)));
2738 : : rpmfiFree(hdr_fi);
2739 : :
2740 : : if(sig && 0 != strlen(sig) && idx != -1)
2741 : : {
2742 : : if (verbose > 2) obatched(clog) << "Found IMA signature for " << b_source1 << ":\n" << sig << endl;
2743 : : ima_sig = sig;
2744 : : inc_metric("http_responses_total","extra","ima-sigs-extracted");
2745 : : }
2746 : : else
2747 : : {
2748 : : if (verbose > 2) obatched(clog) << "Could not find IMA signature for " << b_source1 << endl;
2749 : : }
2750 : :
2751 : : rpmtdFreeData (&sig_tag_data);
2752 : : headerFree(rpm_hdr);
2753 : : Fclose(rpm_fd);
2754 : : } while(false);
2755 : : #endif
2756 : :
2757 : : // check for a match in the fdcache first
2758 [ + - ]: 1000 : int fd = fdcache.lookup(b_source0, b_source1);
2759 [ + + ]: 1000 : while (fd >= 0) // got one!; NB: this is really an if() with a possible branch out to the end
2760 : : {
2761 : 772 : rc = fstat(fd, &fs);
2762 [ - + ]: 772 : if (rc < 0) // disappeared?
2763 : : {
2764 [ # # ]: 0 : if (verbose)
2765 [ # # # # : 0 : obatched(clog) << "cannot fstat fdcache " << b_source0 << endl;
# # ]
2766 [ # # ]: 0 : close(fd);
2767 [ # # ]: 0 : fdcache.clear(b_source0, b_source1);
2768 : : break; // branch out of if "loop", to try new libarchive fetch attempt
2769 : : }
2770 : :
2771 [ + - + - : 772 : struct MHD_Response* r = create_buildid_r_response (b_mtime, b_source0,
- + - - ]
2772 : : b_source1, section,
2773 : : ima_sig, NULL, fd,
2774 : : fs.st_size,
2775 : : fs.st_mtime,
2776 : : "archive fdcache",
2777 : : extract_begin);
2778 [ + + ]: 772 : if (r == 0)
2779 : : break; // branch out of if "loop", to try new libarchive fetch attempt
2780 [ + - ]: 770 : if (result_fd)
2781 : 770 : *result_fd = fd;
2782 : : return r;
2783 : : // NB: see, we never go around the 'loop' more than once
2784 : : }
2785 : :
2786 : : // no match ... look for a seekable entry
2787 : 230 : bool populate_seekable = ! passive_p;
2788 : 230 : unique_ptr<sqlite_ps> pp (new sqlite_ps (internal_req_p ? db : dbq,
2789 : : "rpm-seekable-query",
2790 : : "select type, size, offset, mtime from " BUILDIDS "_r_seekable "
2791 [ + - + - : 460 : "where file = ? and content = ?"));
+ - + + +
- + - + -
- - ]
2792 [ + - + - : 230 : rc = pp->reset().bind(1, b_id0).bind(2, b_id1).step();
+ - + - ]
2793 [ + + ]: 230 : if (rc != SQLITE_DONE)
2794 : : {
2795 [ - + ]: 94 : if (rc != SQLITE_ROW)
2796 [ # # # # ]: 0 : throw sqlite_exception(rc, "step");
2797 : : // if we found a match in _r_seekable but we fail to extract it, don't
2798 : : // bother populating it again
2799 : 94 : populate_seekable = false;
2800 [ + - ]: 94 : const char* seekable_type = (const char*) sqlite3_column_text (*pp, 0);
2801 [ + - - + ]: 94 : if (seekable_type != NULL && strcmp (seekable_type, "xz") == 0)
2802 : : {
2803 [ + - ]: 94 : int64_t seekable_size = sqlite3_column_int64 (*pp, 1);
2804 [ + - ]: 94 : int64_t seekable_offset = sqlite3_column_int64 (*pp, 2);
2805 [ + - ]: 94 : int64_t seekable_mtime = sqlite3_column_int64 (*pp, 3);
2806 : :
2807 : 94 : char* tmppath = NULL;
2808 [ - + ]: 94 : if (asprintf (&tmppath, "%s/debuginfod-fdcache.XXXXXX", tmpdir.c_str()) < 0)
2809 [ # # # # ]: 0 : throw libc_exception (ENOMEM, "cannot allocate tmppath");
2810 : 94 : defer_dtor<void*,void> tmmpath_freer (tmppath, free);
2811 : :
2812 [ + - ]: 94 : fd = extract_from_seekable_archive (b_source0, tmppath,
2813 : : seekable_offset, seekable_size);
2814 [ + - ]: 94 : if (fd >= 0)
2815 : : {
2816 : : // Set the mtime so the fdcache file mtimes propagate to future webapi
2817 : : // clients.
2818 : 94 : struct timespec tvs[2];
2819 : 94 : tvs[0].tv_sec = 0;
2820 : 94 : tvs[0].tv_nsec = UTIME_OMIT;
2821 : 94 : tvs[1].tv_sec = seekable_mtime;
2822 : 94 : tvs[1].tv_nsec = 0;
2823 : 94 : (void) futimens (fd, tvs); /* best effort */
2824 [ + - + - : 94 : struct MHD_Response* r = create_buildid_r_response (b_mtime,
+ - ]
2825 : : b_source0,
2826 : : b_source1,
2827 : : section,
2828 : : ima_sig,
2829 : : tmppath, fd,
2830 : : seekable_size,
2831 : : seekable_mtime,
2832 : : "seekable xz archive",
2833 : : extract_begin);
2834 [ + - ]: 94 : if (r != 0 && result_fd)
2835 : 94 : *result_fd = fd;
2836 : 94 : return r;
2837 : : }
2838 : 94 : }
2839 : : }
2840 : 136 : pp.reset();
2841 : :
2842 : : // still no match ... grumble, must process the archive
2843 [ + - ]: 136 : string archive_decoder = "/dev/null";
2844 [ + - - - ]: 136 : string archive_extension = "";
2845 [ + + ]: 310 : for (auto&& arch : scan_archives)
2846 [ + + ]: 174 : if (string_endswith(b_source0, arch.first))
2847 : : {
2848 [ + - ]: 136 : archive_extension = arch.first;
2849 [ + - ]: 310 : archive_decoder = arch.second;
2850 : : }
2851 : 136 : FILE* fp;
2852 : :
2853 : 136 : defer_dtor<FILE*,int>::dtor_fn dfn;
2854 [ + + ]: 136 : if (archive_decoder != "cat")
2855 : : {
2856 [ + - + - : 48 : string popen_cmd = archive_decoder + " " + shell_escape(b_source0);
+ - - + -
- - - ]
2857 [ + - ]: 24 : fp = popen (popen_cmd.c_str(), "r"); // "e" O_CLOEXEC?
2858 : 24 : dfn = pclose;
2859 [ - + ]: 24 : if (fp == NULL)
2860 [ # # # # : 0 : throw libc_exception (errno, string("popen ") + popen_cmd);
# # # # ]
2861 : 24 : }
2862 : : else
2863 : : {
2864 [ + - ]: 112 : fp = fopen (b_source0.c_str(), "r");
2865 : 112 : dfn = fclose;
2866 [ - + ]: 112 : if (fp == NULL)
2867 [ # # # # : 0 : throw libc_exception (errno, string("fopen ") + b_source0);
# # # # ]
2868 : : }
2869 : 136 : defer_dtor<FILE*,int> fp_closer (fp, dfn);
2870 : :
2871 : 136 : struct archive *a;
2872 [ + - ]: 136 : a = archive_read_new();
2873 [ - + ]: 136 : if (a == NULL)
2874 [ # # # # ]: 0 : throw archive_exception("cannot create archive reader");
2875 : 136 : defer_dtor<struct archive*,int> archive_closer (a, archive_read_free);
2876 : :
2877 [ + - ]: 136 : rc = archive_read_support_format_all(a);
2878 [ - + ]: 136 : if (rc != ARCHIVE_OK)
2879 [ # # # # ]: 0 : throw archive_exception(a, "cannot select all format");
2880 [ + - ]: 136 : rc = archive_read_support_filter_all(a);
2881 [ - + ]: 136 : if (rc != ARCHIVE_OK)
2882 [ # # # # ]: 0 : throw archive_exception(a, "cannot select all filters");
2883 : :
2884 [ + - ]: 136 : rc = archive_read_open_FILE (a, fp);
2885 [ - + ]: 136 : if (rc != ARCHIVE_OK)
2886 : : {
2887 [ # # # # : 0 : obatched(clog) << "cannot open archive from pipe " << b_source0 << endl;
# # ]
2888 [ # # # # ]: 0 : throw archive_exception(a, "cannot open archive from pipe");
2889 : : }
2890 : :
2891 : : // If the archive was scanned in a version without _r_seekable, then we may
2892 : : // need to populate _r_seekable now. This can be removed the next time
2893 : : // BUILDIDS is updated.
2894 [ + + ]: 136 : if (populate_seekable)
2895 : : {
2896 [ + - ]: 134 : populate_seekable = is_seekable_archive (b_source0, a);
2897 [ + - ]: 134 : if (populate_seekable)
2898 : : {
2899 : : // NB: the names are already interned
2900 [ # # ]: 0 : pp.reset(new sqlite_ps (db, "rpm-seekable-insert2",
2901 : : "insert or ignore into " BUILDIDS "_r_seekable (file, content, type, size, offset, mtime) "
2902 : : "values (?, "
2903 : : "(select id from " BUILDIDS "_files "
2904 : : "where dirname = (select id from " BUILDIDS "_fileparts where name = ?) "
2905 : : "and basename = (select id from " BUILDIDS "_fileparts where name = ?) "
2906 [ # # # # : 0 : "), 'xz', ?, ?, ?)"));
# # # # #
# # # ]
2907 : : }
2908 : : }
2909 : :
2910 : : // archive traversal is in five stages:
2911 : : // 1) before we find a matching entry, insert it into _r_seekable if needed or
2912 : : // skip it otherwise
2913 : : // 2) extract the matching entry (set r = result). Also insert it into
2914 : : // _r_seekable if needed
2915 : : // 3) extract some number of prefetched entries (just into fdcache). Also
2916 : : // insert them into _r_seekable if needed
2917 : : // 4) if needed, insert all of the remaining entries into _r_seekable
2918 : : // 5) abort any further processing
2919 : 136 : struct MHD_Response* r = 0; // will set in stage 2
2920 [ + + ]: 136 : unsigned prefetch_count =
2921 : : internal_req_p ? 0 : fdcache_prefetch; // will decrement in stage 3
2922 : :
2923 [ + + - + ]: 1446 : while(r == 0 || prefetch_count > 0 || populate_seekable) // stage 1-4
2924 : : {
2925 [ + - ]: 1426 : if (interrupted)
2926 : : break;
2927 : :
2928 : 1426 : struct archive_entry *e;
2929 [ + - ]: 1426 : rc = archive_read_next_header (a, &e);
2930 [ + + ]: 1426 : if (rc != ARCHIVE_OK)
2931 : : break;
2932 : :
2933 [ + - + + ]: 1312 : if (! S_ISREG(archive_entry_mode (e))) // skip non-files completely
2934 : 1038 : continue;
2935 : :
2936 [ + - ]: 274 : string fn = canonicalized_archive_entry_pathname (e);
2937 : :
2938 [ - + ]: 274 : if (populate_seekable)
2939 : : {
2940 : 0 : string dn, bn;
2941 : 0 : size_t slash = fn.rfind('/');
2942 [ # # ]: 0 : if (slash == std::string::npos) {
2943 [ # # ]: 0 : dn = "";
2944 [ # # ]: 0 : bn = fn;
2945 : : } else {
2946 [ # # # # ]: 0 : dn = fn.substr(0, slash);
2947 [ # # # # ]: 0 : bn = fn.substr(slash + 1);
2948 : : }
2949 : :
2950 [ # # ]: 0 : int64_t seekable_size = archive_entry_size (e);
2951 [ # # ]: 0 : int64_t seekable_offset = archive_filter_bytes (a, 0);
2952 [ # # ]: 0 : time_t seekable_mtime = archive_entry_mtime (e);
2953 : :
2954 [ # # ]: 0 : pp->reset();
2955 [ # # ]: 0 : pp->bind(1, b_id0);
2956 [ # # ]: 0 : pp->bind(2, dn);
2957 [ # # ]: 0 : pp->bind(3, bn);
2958 [ # # ]: 0 : pp->bind(4, seekable_size);
2959 [ # # ]: 0 : pp->bind(5, seekable_offset);
2960 [ # # ]: 0 : pp->bind(6, seekable_mtime);
2961 [ # # ]: 0 : rc = pp->step();
2962 [ # # ]: 0 : if (rc != SQLITE_DONE)
2963 [ # # # # ]: 0 : obatched(clog) << "recording seekable file=" << fn
2964 [ # # # # : 0 : << " sqlite3 error: " << (sqlite3_errstr(rc) ?: "?") << endl;
# # # # #
# ]
2965 [ # # ]: 0 : else if (verbose > 2)
2966 [ # # # # : 0 : obatched(clog) << "recorded seekable file=" << fn
# # ]
2967 [ # # # # ]: 0 : << " size=" << seekable_size
2968 [ # # # # ]: 0 : << " offset=" << seekable_offset
2969 [ # # # # : 0 : << " mtime=" << seekable_mtime << endl;
# # ]
2970 [ # # ]: 0 : if (r != 0 && prefetch_count == 0) // stage 4
2971 [ # # ]: 0 : continue;
2972 [ # # # # ]: 0 : }
2973 : :
2974 [ + + + + ]: 274 : if ((r == 0) && (fn != b_source1)) // stage 1
2975 : 68 : continue;
2976 : :
2977 [ + - + + ]: 206 : if (fdcache.probe (b_source0, fn) && // skip if already interned
2978 [ + + ]: 26 : fn != b_source1) // but only if we'd just be prefetching, PR29474
2979 : 24 : continue;
2980 : :
2981 : : // extract this file to a temporary file
2982 : 182 : char* tmppath = NULL;
2983 : 182 : rc = asprintf (&tmppath, "%s/debuginfod-fdcache.XXXXXX", tmpdir.c_str());
2984 [ - + ]: 182 : if (rc < 0)
2985 [ # # # # ]: 0 : throw libc_exception (ENOMEM, "cannot allocate tmppath");
2986 : 182 : defer_dtor<void*,void> tmmpath_freer (tmppath, free);
2987 [ + - ]: 182 : fd = mkstemp (tmppath);
2988 [ - + ]: 182 : if (fd < 0)
2989 [ # # # # ]: 0 : throw libc_exception (errno, "cannot create temporary file");
2990 : : // NB: don't unlink (tmppath), as fdcache will take charge of it.
2991 : :
2992 : : // NB: this can take many uninterruptible seconds for a huge file
2993 [ + - ]: 182 : rc = archive_read_data_into_fd (a, fd);
2994 [ - + ]: 182 : if (rc != ARCHIVE_OK) // e.g. ENOSPC!
2995 : : {
2996 [ # # ]: 0 : close (fd);
2997 : 0 : unlink (tmppath);
2998 [ # # # # ]: 0 : throw archive_exception(a, "cannot extract file");
2999 : : }
3000 : :
3001 : : // Set the mtime so the fdcache file mtimes, even prefetched ones,
3002 : : // propagate to future webapi clients.
3003 : 182 : struct timespec tvs[2];
3004 : 182 : tvs[0].tv_sec = 0;
3005 : 182 : tvs[0].tv_nsec = UTIME_OMIT;
3006 [ + - ]: 182 : tvs[1].tv_sec = archive_entry_mtime(e);
3007 [ + - ]: 182 : tvs[1].tv_nsec = archive_entry_mtime_nsec(e);
3008 : 182 : (void) futimens (fd, tvs); /* best effort */
3009 : :
3010 [ + + ]: 182 : if (r != 0) // stage 3
3011 : : {
3012 : 46 : struct timespec extract_end;
3013 : 46 : clock_gettime (CLOCK_MONOTONIC, &extract_end);
3014 : 46 : double extract_time = (extract_end.tv_sec - extract_begin.tv_sec)
3015 : 46 : + (extract_end.tv_nsec - extract_begin.tv_nsec)/1.e9;
3016 : : // NB: now we know we have a complete reusable file; make fdcache
3017 : : // responsible for unlinking it later.
3018 [ + - + - : 46 : fdcache.intern(b_source0, fn,
+ - ]
3019 : : tmppath, archive_entry_size(e),
3020 : : false, extract_time); // prefetched ones go to the prefetch cache
3021 : 46 : prefetch_count --;
3022 [ + - ]: 46 : close (fd); // we're not saving this fd to make a mhd-response from!
3023 : 46 : continue;
3024 : 46 : }
3025 : :
3026 [ + - + - : 136 : r = create_buildid_r_response (b_mtime, b_source0, b_source1, section,
+ - + + ]
3027 : : ima_sig, tmppath, fd,
3028 : : archive_entry_size(e),
3029 : : archive_entry_mtime(e),
3030 [ + - ]: 136 : archive_extension + " archive",
3031 : : extract_begin);
3032 [ + + ]: 136 : if (r == 0)
3033 : : break; // assume no chance of better luck around another iteration; no other copies of same file
3034 [ + - ]: 134 : if (result_fd)
3035 : 134 : *result_fd = fd;
3036 [ + - + - : 1700 : }
+ + ]
3037 : :
3038 : : // XXX: rpm/file not found: delete this R entry?
3039 : 136 : return r;
3040 [ - + + + ]: 1366 : }
3041 : :
3042 : : void
3043 : 646 : add_client_federation_headers(debuginfod_client *client, MHD_Connection* conn){
3044 : : // Transcribe incoming User-Agent:
3045 [ - + ]: 646 : string ua = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: "";
3046 [ + - + - : 650 : string ua_complete = string("User-Agent: ") + ua;
+ - ]
3047 [ + - ]: 646 : debuginfod_add_http_header (client, ua_complete.c_str());
3048 : :
3049 : : // Compute larger XFF:, for avoiding info loss during
3050 : : // federation, and for future cyclicity detection.
3051 [ + - + + : 1270 : string xff = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: "";
+ - + - ]
3052 [ + + ]: 646 : if (xff != "")
3053 [ + - - + ]: 52 : xff += string(", "); // comma separated list
3054 : :
3055 : 646 : unsigned int xff_count = 0;
3056 [ + + ]: 974 : for (auto&& i : xff){
3057 [ + + ]: 328 : if (i == ',') xff_count++;
3058 : : }
3059 : :
3060 : : // if X-Forwarded-For: exceeds N hops,
3061 : : // do not delegate a local lookup miss to upstream debuginfods.
3062 [ + + ]: 646 : if (xff_count >= forwarded_ttl_limit)
3063 [ + - + - ]: 8 : throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found, --forwared-ttl-limit reached \
3064 : 8 : and will not query the upstream servers");
3065 : :
3066 : : // Compute the client's numeric IP address only - so can't merge with conninfo()
3067 [ + - ]: 642 : const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn,
3068 : : MHD_CONNECTION_INFO_CLIENT_ADDRESS);
3069 [ + - ]: 642 : struct sockaddr *so = u ? u->client_addr : 0;
3070 : 642 : char hostname[256] = ""; // RFC1035
3071 [ + - - + ]: 642 : if (so && so->sa_family == AF_INET) {
3072 [ # # ]: 0 : (void) getnameinfo (so, sizeof (struct sockaddr_in), hostname, sizeof (hostname), NULL, 0,
3073 : : NI_NUMERICHOST);
3074 [ + - ]: 642 : } else if (so && so->sa_family == AF_INET6) {
3075 : 642 : struct sockaddr_in6* addr6 = (struct sockaddr_in6*) so;
3076 [ + - + - : 642 : if (IN6_IS_ADDR_V4MAPPED(&addr6->sin6_addr)) {
- + ]
3077 : 642 : struct sockaddr_in addr4;
3078 [ + - ]: 642 : memset (&addr4, 0, sizeof(addr4));
3079 : 642 : addr4.sin_family = AF_INET;
3080 : 642 : addr4.sin_port = addr6->sin6_port;
3081 [ + - ]: 642 : memcpy (&addr4.sin_addr.s_addr, addr6->sin6_addr.s6_addr+12, sizeof(addr4.sin_addr.s_addr));
3082 [ + - ]: 642 : (void) getnameinfo ((struct sockaddr*) &addr4, sizeof (addr4),
3083 : : hostname, sizeof (hostname), NULL, 0,
3084 : : NI_NUMERICHOST);
3085 : : } else {
3086 [ # # ]: 0 : (void) getnameinfo (so, sizeof (struct sockaddr_in6), hostname, sizeof (hostname), NULL, 0,
3087 : : NI_NUMERICHOST);
3088 : : }
3089 : : }
3090 : :
3091 [ + - + - : 1288 : string xff_complete = string("X-Forwarded-For: ")+xff+string(hostname);
+ - + - -
+ - + - -
- + ]
3092 [ + - ]: 642 : debuginfod_add_http_header (client, xff_complete.c_str());
3093 [ + + + - : 1360 : }
+ + ]
3094 : :
3095 : : static struct MHD_Response*
3096 : 2184 : handle_buildid_match (bool internal_req_p,
3097 : : int64_t b_mtime,
3098 : : const string& b_stype,
3099 : : const string& b_source0,
3100 : : const string& b_source1,
3101 : : int64_t b_id0,
3102 : : int64_t b_id1,
3103 : : const string& section,
3104 : : int *result_fd)
3105 : : {
3106 : 2184 : try
3107 : : {
3108 [ + + ]: 2184 : if (b_stype == "F")
3109 [ + - ]: 1126 : return handle_buildid_f_match(internal_req_p, b_mtime, b_source0,
3110 : : section, result_fd);
3111 [ + - ]: 1058 : else if (b_stype == "R")
3112 [ + + ]: 1058 : return handle_buildid_r_match(internal_req_p, b_mtime, b_source0,
3113 : : b_source1, b_id0, b_id1, section,
3114 : : result_fd);
3115 : : }
3116 [ - + ]: 58 : catch (const reportable_exception &e)
3117 : : {
3118 [ + - ]: 58 : e.report(clog);
3119 : : // Report but swallow libc etc. errors here; let the caller
3120 : : // iterate to other matches of the content.
3121 : 58 : }
3122 : :
3123 : : return 0;
3124 : : }
3125 : :
3126 : :
3127 : : static int
3128 : 4 : debuginfod_find_progress (debuginfod_client *, long a, long b)
3129 : : {
3130 [ - + ]: 4 : if (verbose > 4)
3131 [ # # # # : 0 : obatched(clog) << "federated debuginfod progress=" << a << "/" << b << endl;
# # # # ]
3132 : :
3133 : 4 : return interrupted;
3134 : : }
3135 : :
3136 : :
3137 : : // a little lru pool of debuginfod_client*s for reuse between query threads
3138 : :
3139 : : mutex dc_pool_lock;
3140 : : deque<debuginfod_client*> dc_pool;
3141 : :
3142 : 666 : debuginfod_client* debuginfod_pool_begin()
3143 : : {
3144 : 666 : unique_lock<mutex> lock(dc_pool_lock);
3145 [ + + ]: 666 : if (dc_pool.size() > 0)
3146 : : {
3147 [ + - + - : 1268 : inc_metric("dc_pool_op_count","op","begin-reuse");
+ - + - -
+ - + - -
- - ]
3148 : 634 : debuginfod_client *c = dc_pool.front();
3149 : 634 : dc_pool.pop_front();
3150 : 634 : return c;
3151 : : }
3152 [ + - + - : 64 : inc_metric("dc_pool_op_count","op","begin-new");
+ - + - -
+ - + - -
- - ]
3153 [ + - ]: 32 : return debuginfod_begin();
3154 : 666 : }
3155 : :
3156 : :
3157 : 158 : void debuginfod_pool_groom()
3158 : : {
3159 : 158 : unique_lock<mutex> lock(dc_pool_lock);
3160 [ + + ]: 190 : while (dc_pool.size() > 0)
3161 : : {
3162 [ + - + - : 64 : inc_metric("dc_pool_op_count","op","end");
+ - + - -
+ - + - -
- - ]
3163 [ + - ]: 32 : debuginfod_end(dc_pool.front());
3164 : 32 : dc_pool.pop_front();
3165 : : }
3166 : 158 : }
3167 : :
3168 : :
3169 : 666 : void debuginfod_pool_end(debuginfod_client* c)
3170 : : {
3171 : 666 : unique_lock<mutex> lock(dc_pool_lock);
3172 [ + - + - : 1332 : inc_metric("dc_pool_op_count","op","end-save");
+ - + - -
+ - + - -
- - ]
3173 [ + - ]: 666 : dc_pool.push_front(c); // accelerate reuse, vs. push_back
3174 : 666 : }
3175 : :
3176 : :
3177 : : static struct MHD_Response*
3178 : 2772 : handle_buildid (MHD_Connection* conn,
3179 : : const string& buildid /* unsafe */,
3180 : : string& artifacttype /* unsafe, cleanse on exception/return */,
3181 : : const string& suffix /* unsafe */,
3182 : : int *result_fd)
3183 : : {
3184 : : // validate artifacttype
3185 : 2772 : string atype_code;
3186 [ + + + - ]: 2772 : if (artifacttype == "debuginfo") atype_code = "D";
3187 [ + + + - ]: 1806 : else if (artifacttype == "executable") atype_code = "E";
3188 [ + + + - ]: 1114 : else if (artifacttype == "source") atype_code = "S";
3189 [ + + + - ]: 12 : else if (artifacttype == "section") atype_code = "I";
3190 : : else {
3191 [ + - ]: 4 : artifacttype = "invalid"; // PR28242 ensure http_resposes metrics don't propagate unclean user data
3192 [ + - + - ]: 12 : throw reportable_exception("invalid artifacttype");
3193 : : }
3194 : :
3195 [ + + ]: 2768 : if (conn != 0)
3196 [ + - + - : 5782 : inc_metric("http_requests_total", "type", artifacttype);
+ - - + -
- - + ]
3197 : :
3198 : 2768 : string section;
3199 [ + + ]: 2768 : if (atype_code == "I")
3200 : : {
3201 [ - + ]: 8 : if (suffix.size () < 2)
3202 [ # # # # ]: 0 : throw reportable_exception ("invalid section suffix");
3203 : :
3204 : : // Remove leading '/'
3205 [ + - - + ]: 8 : section = suffix.substr(1);
3206 : : }
3207 : :
3208 [ + + - + ]: 3870 : if (atype_code == "S" && suffix == "")
3209 [ # # # # ]: 0 : throw reportable_exception("invalid source suffix");
3210 : :
3211 : : // validate buildid
3212 [ + + ]: 2768 : if ((buildid.size() < 2) || // not empty
3213 [ + + + - : 5530 : (buildid.size() % 2) || // even number
+ - ]
3214 : 2762 : (buildid.find_first_not_of("0123456789abcdef") != string::npos)) // pure tasty lowercase hex
3215 [ + - - + ]: 12 : throw reportable_exception("invalid buildid");
3216 : :
3217 [ + - ]: 2762 : if (verbose > 1)
3218 [ + - + - ]: 8286 : obatched(clog) << "searching for buildid=" << buildid << " artifacttype=" << artifacttype
3219 [ + - + - : 2762 : << " suffix=" << suffix << endl;
+ - + - +
- ]
3220 : :
3221 : : // If invoked from the scanner threads, use the scanners' read-write
3222 : : // connection. Otherwise use the web query threads' read-only connection.
3223 [ + + ]: 2762 : sqlite3 *thisdb = (conn == 0) ? db : dbq;
3224 : :
3225 : 2762 : sqlite_ps *pp = 0;
3226 : :
3227 [ + + ]: 2762 : if (atype_code == "D")
3228 : : {
3229 [ - + ]: 966 : pp = new sqlite_ps (thisdb, "mhd-query-d",
3230 : : "select mtime, sourcetype, source0, source1, id0, id1 from " BUILDIDS "_query_d2 where buildid = ? "
3231 [ + - + - : 1932 : "order by mtime desc");
+ - + - +
- - - ]
3232 [ + - ]: 966 : pp->reset();
3233 [ + - ]: 966 : pp->bind(1, buildid);
3234 : : }
3235 [ + + ]: 1796 : else if (atype_code == "E")
3236 : : {
3237 [ - + ]: 686 : pp = new sqlite_ps (thisdb, "mhd-query-e",
3238 : : "select mtime, sourcetype, source0, source1, id0, id1 from " BUILDIDS "_query_e2 where buildid = ? "
3239 [ + - + - : 1372 : "order by mtime desc");
+ - + - +
- - - ]
3240 [ + - ]: 686 : pp->reset();
3241 [ + - ]: 686 : pp->bind(1, buildid);
3242 : : }
3243 [ + + ]: 1110 : else if (atype_code == "S")
3244 : : {
3245 : : // PR25548
3246 : : // Incoming source queries may come in with either dwarf-level OR canonicalized paths.
3247 : : // We let the query pass with either one.
3248 : :
3249 [ - + ]: 1102 : pp = new sqlite_ps (thisdb, "mhd-query-s",
3250 : : "select mtime, sourcetype, source0, source1 from " BUILDIDS "_query_s where buildid = ? and artifactsrc in (?,?) "
3251 [ + - + - : 2204 : "order by sharedprefix(source0,source0ref) desc, mtime desc");
+ - + - +
- - - ]
3252 [ + - ]: 1102 : pp->reset();
3253 [ + - ]: 1102 : pp->bind(1, buildid);
3254 : : // NB: we don't store the non-canonicalized path names any more, but old databases
3255 : : // might have them (and no canon ones), so we keep searching for both.
3256 [ + - ]: 1102 : pp->bind(2, suffix);
3257 [ + - + - : 2846 : pp->bind(3, canon_pathname(suffix));
- + ]
3258 : : }
3259 [ + - ]: 8 : else if (atype_code == "I")
3260 : : {
3261 [ - + ]: 8 : pp = new sqlite_ps (thisdb, "mhd-query-i",
3262 : : "select mtime, sourcetype, source0, source1, 1 as debug_p from " BUILDIDS "_query_d2 where buildid = ? "
3263 : : "union all "
3264 : : "select mtime, sourcetype, source0, source1, 0 as debug_p from " BUILDIDS "_query_e2 where buildid = ? "
3265 [ + - + - : 16 : "order by debug_p desc, mtime desc");
+ - + - +
- - - ]
3266 [ + - ]: 8 : pp->reset();
3267 [ + - ]: 8 : pp->bind(1, buildid);
3268 [ + - ]: 8 : pp->bind(2, buildid);
3269 : : }
3270 : 2762 : unique_ptr<sqlite_ps> ps_closer(pp); // release pp if exception or return
3271 : :
3272 : 2762 : bool do_upstream_section_query = true;
3273 : :
3274 : : // consume all the rows
3275 : 2824 : while (1)
3276 : : {
3277 [ + - ]: 2824 : int rc = pp->step();
3278 [ + + ]: 2824 : if (rc == SQLITE_DONE) break;
3279 [ - + ]: 2184 : if (rc != SQLITE_ROW)
3280 [ # # # # ]: 0 : throw sqlite_exception(rc, "step");
3281 : :
3282 [ + - ]: 2184 : int64_t b_mtime = sqlite3_column_int64 (*pp, 0);
3283 [ + - - + : 2184 : string b_stype = string((const char*) sqlite3_column_text (*pp, 1) ?: ""); /* by DDL may not be NULL */
+ - ]
3284 [ + - - + : 2184 : string b_source0 = string((const char*) sqlite3_column_text (*pp, 2) ?: ""); /* may be NULL */
+ - - - ]
3285 [ + - + + : 3310 : string b_source1 = string((const char*) sqlite3_column_text (*pp, 3) ?: ""); /* may be NULL */
+ - - - ]
3286 : 2184 : int64_t b_id0 = 0, b_id1 = 0;
3287 [ + + + + ]: 3452 : if (atype_code == "D" || atype_code == "E")
3288 : : {
3289 [ + - ]: 1068 : b_id0 = sqlite3_column_int64 (*pp, 4);
3290 [ + - ]: 1068 : b_id1 = sqlite3_column_int64 (*pp, 5);
3291 : : }
3292 : :
3293 [ + - ]: 2184 : if (verbose > 1)
3294 [ + - + - : 6552 : obatched(clog) << "found mtime=" << b_mtime << " stype=" << b_stype
- - ]
3295 [ + - + - : 2184 : << " source0=" << b_source0 << " source1=" << b_source1 << endl;
+ - + - +
- + - +
- ]
3296 : :
3297 : : // Try accessing the located match.
3298 : : // XXX: in case of multiple matches, attempt them in parallel?
3299 [ + - ]: 2184 : auto r = handle_buildid_match (conn ? false : true,
3300 : : b_mtime, b_stype, b_source0, b_source1,
3301 : : b_id0, b_id1, section, result_fd);
3302 [ + + ]: 2184 : if (r)
3303 [ + + ]: 2122 : return r;
3304 : :
3305 : : // If a debuginfo file matching BUILDID was found but didn't contain
3306 : : // the desired section, then the section should not exist. Don't
3307 : : // bother querying upstream servers.
3308 [ + + + - : 62 : if (!section.empty () && (sqlite3_column_int (*pp, 4) == 1))
- + ]
3309 : : {
3310 : 4 : struct stat st;
3311 : :
3312 : : // For "F" sourcetype, check if the debuginfo exists. For "R"
3313 : : // sourcetype, check if the debuginfo was interned into the fdcache.
3314 [ - + ]: 6 : if ((b_stype == "F" && (stat (b_source0.c_str (), &st) == 0))
3315 [ + + + - : 6 : || (b_stype == "R" && fdcache.probe (b_source0, b_source1)))
+ - - + ]
3316 : : do_upstream_section_query = false;
3317 : : }
3318 [ + - - + : 4368 : }
+ - - + ]
3319 [ + - ]: 640 : pp->reset();
3320 : :
3321 [ - + ]: 640 : if (!do_upstream_section_query)
3322 [ # # # # ]: 0 : throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found");
3323 : :
3324 : : // We couldn't find it in the database. Last ditch effort
3325 : : // is to defer to other debuginfo servers.
3326 : :
3327 : 640 : int fd = -1;
3328 [ + - ]: 640 : debuginfod_client *client = debuginfod_pool_begin ();
3329 [ - + ]: 640 : if (client == NULL)
3330 [ # # # # ]: 0 : throw libc_exception(errno, "debuginfod client pool alloc");
3331 : 640 : defer_dtor<debuginfod_client*,void> client_closer (client, debuginfod_pool_end);
3332 : :
3333 [ + - ]: 640 : debuginfod_set_progressfn (client, & debuginfod_find_progress);
3334 : :
3335 [ + + ]: 640 : if (conn)
3336 [ + + ]: 620 : add_client_federation_headers(client, conn);
3337 : :
3338 [ + + ]: 636 : if (artifacttype == "debuginfo")
3339 [ + - ]: 88 : fd = debuginfod_find_debuginfo (client,
3340 [ + - ]: 88 : (const unsigned char*) buildid.c_str(),
3341 : : 0, NULL);
3342 [ + + ]: 548 : else if (artifacttype == "executable")
3343 [ + - ]: 546 : fd = debuginfod_find_executable (client,
3344 [ + - ]: 546 : (const unsigned char*) buildid.c_str(),
3345 : : 0, NULL);
3346 [ + - ]: 2 : else if (artifacttype == "source")
3347 [ + - ]: 2 : fd = debuginfod_find_source (client,
3348 [ + - ]: 2 : (const unsigned char*) buildid.c_str(),
3349 : : 0, suffix.c_str(), NULL);
3350 [ # # ]: 0 : else if (artifacttype == "section")
3351 [ # # ]: 0 : fd = debuginfod_find_section (client,
3352 [ # # ]: 0 : (const unsigned char*) buildid.c_str(),
3353 : : 0, section.c_str(), NULL);
3354 : :
3355 [ + + ]: 636 : if (fd >= 0)
3356 : : {
3357 [ + - ]: 4 : if (conn != 0)
3358 [ + - + - : 644 : inc_metric ("http_responses_total","result","upstream");
+ - + - -
+ - + - -
- - ]
3359 : 4 : struct stat s;
3360 : 4 : int rc = fstat (fd, &s);
3361 [ + - ]: 4 : if (rc == 0)
3362 : : {
3363 [ + - ]: 4 : auto r = MHD_create_response_from_fd ((uint64_t) s.st_size, fd);
3364 [ + - ]: 4 : if (r)
3365 : : {
3366 [ + - ]: 4 : add_mhd_response_header (r, "Content-Type",
3367 : : "application/octet-stream");
3368 : : // Copy the incoming headers
3369 [ + - ]: 4 : const char * hdrs = debuginfod_get_headers(client);
3370 [ + - ]: 4 : string header_dup;
3371 [ + - ]: 4 : if (hdrs)
3372 [ + - - + ]: 4 : header_dup = string(hdrs);
3373 : : // Parse the "header: value\n" lines into (h,v) tuples and pass on
3374 : 20 : while(1)
3375 : : {
3376 : 12 : size_t newline = header_dup.find('\n');
3377 [ + + ]: 12 : if (newline == string::npos) break;
3378 : 8 : size_t colon = header_dup.find(':');
3379 [ + - ]: 8 : if (colon == string::npos) break;
3380 [ + - ]: 8 : string header = header_dup.substr(0,colon);
3381 [ + - ]: 8 : string value = header_dup.substr(colon+1,newline-colon-1);
3382 : : // strip leading spaces from value
3383 : 8 : size_t nonspace = value.find_first_not_of(" ");
3384 [ + - ]: 8 : if (nonspace != string::npos)
3385 [ + - + + ]: 12 : value = value.substr(nonspace);
3386 [ + - ]: 8 : add_mhd_response_header(r, header.c_str(), value.c_str());
3387 [ + - + + : 12 : header_dup = header_dup.substr(newline+1);
+ + - - ]
3388 [ + - ]: 16 : }
3389 : :
3390 [ + - ]: 4 : add_mhd_last_modified (r, s.st_mtime);
3391 [ + - ]: 4 : if (verbose > 1)
3392 [ + - + - : 8 : obatched(clog) << "serving file from upstream debuginfod/cache" << endl;
- - ]
3393 [ + - ]: 4 : if (result_fd)
3394 : 4 : *result_fd = fd;
3395 [ + - ]: 4 : return r; // NB: don't close fd; libmicrohttpd will
3396 : 4 : }
3397 : : }
3398 [ # # ]: 0 : close (fd);
3399 : : }
3400 : : else
3401 [ + + ]: 632 : switch(fd)
3402 : : {
3403 : : case -ENOSYS:
3404 : : break;
3405 : : case -ENOENT:
3406 : : break;
3407 : 532 : default: // some more tricky error
3408 [ + - + - ]: 1064 : throw libc_exception(-fd, "upstream debuginfod query failed");
3409 : : }
3410 : :
3411 [ + - - + ]: 200 : throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found");
3412 [ - + - + ]: 3404 : }
3413 : :
3414 : :
3415 : : ////////////////////////////////////////////////////////////////////////
3416 : :
3417 : : static map<string,double> metrics; // arbitrary data for /metrics query
3418 : : // NB: store int64_t since all our metrics are integers; prometheus accepts double
3419 : : static mutex metrics_lock;
3420 : : // NB: these objects get released during the process exit via global dtors
3421 : : // do not call them from within other global dtors
3422 : :
3423 : : // utility function for assembling prometheus-compatible
3424 : : // name="escaped-value" strings
3425 : : // https://prometheus.io/docs/instrumenting/exposition_formats/
3426 : : static string
3427 : 788519 : metric_label(const string& name, const string& value)
3428 : : {
3429 : 788519 : string x = name + "=\"";
3430 [ + + ]: 13605170 : for (auto&& c : value)
3431 [ - - - + ]: 12817171 : switch(c)
3432 : : {
3433 [ # # ]: 0 : case '\\': x += "\\\\"; break;
3434 [ # # ]: 0 : case '\"': x += "\\\""; break;
3435 [ # # ]: 0 : case '\n': x += "\\n"; break;
3436 [ + - ]: 25634107 : default: x += c; break;
3437 : : }
3438 [ + - ]: 787999 : x += "\"";
3439 : 788093 : return x;
3440 : 0 : }
3441 : :
3442 : :
3443 : : // add prometheus-format metric name + label tuple (if any) + value
3444 : :
3445 : : static void
3446 : 1608 : set_metric(const string& metric, double value)
3447 : : {
3448 : 1608 : unique_lock<mutex> lock(metrics_lock);
3449 [ + - ]: 1608 : metrics[metric] = value;
3450 : 1608 : }
3451 : : static void
3452 : 784 : inc_metric(const string& metric)
3453 : : {
3454 : 784 : unique_lock<mutex> lock(metrics_lock);
3455 [ + - ]: 784 : metrics[metric] ++;
3456 : 784 : }
3457 : : static void
3458 : 7052 : set_metric(const string& metric,
3459 : : const string& lname, const string& lvalue,
3460 : : double value)
3461 : : {
3462 [ + - + - : 14102 : string key = (metric + "{" + metric_label(lname, lvalue) + "}");
- + - + +
+ - - ]
3463 [ + - ]: 7050 : unique_lock<mutex> lock(metrics_lock);
3464 [ + - ]: 7052 : metrics[key] = value;
3465 [ + - ]: 14104 : }
3466 : :
3467 : : static void
3468 : 376839 : inc_metric(const string& metric,
3469 : : const string& lname, const string& lvalue)
3470 : : {
3471 [ + - + - : 819623 : string key = (metric + "{" + metric_label(lname, lvalue) + "}");
- + + + +
+ - - ]
3472 [ + - ]: 376830 : unique_lock<mutex> lock(metrics_lock);
3473 [ + - ]: 376841 : metrics[key] ++;
3474 [ + - ]: 753653 : }
3475 : : static void
3476 : 364565 : add_metric(const string& metric,
3477 : : const string& lname, const string& lvalue,
3478 : : double value)
3479 : : {
3480 [ + - + - : 795046 : string key = (metric + "{" + metric_label(lname, lvalue) + "}");
- + + + +
+ - - ]
3481 [ + - ]: 364530 : unique_lock<mutex> lock(metrics_lock);
3482 [ + - ]: 364584 : metrics[key] += value;
3483 [ + - ]: 729157 : }
3484 : : static void
3485 : 784 : add_metric(const string& metric,
3486 : : double value)
3487 : : {
3488 : 784 : unique_lock<mutex> lock(metrics_lock);
3489 [ + - ]: 784 : metrics[metric] += value;
3490 : 784 : }
3491 : :
3492 : :
3493 : : // and more for higher arity labels if needed
3494 : :
3495 : : static void
3496 : 10041 : inc_metric(const string& metric,
3497 : : const string& lname, const string& lvalue,
3498 : : const string& rname, const string& rvalue)
3499 : : {
3500 [ + - - + : 20082 : string key = (metric + "{"
- - ]
3501 [ + - + - : 40164 : + metric_label(lname, lvalue) + ","
- + - + +
+ - - ]
3502 [ + - - + : 30123 : + metric_label(rname, rvalue) + "}");
- + ]
3503 [ + - ]: 10041 : unique_lock<mutex> lock(metrics_lock);
3504 [ + - ]: 10041 : metrics[key] ++;
3505 [ + - ]: 20082 : }
3506 : : static void
3507 : 10041 : add_metric(const string& metric,
3508 : : const string& lname, const string& lvalue,
3509 : : const string& rname, const string& rvalue,
3510 : : double value)
3511 : : {
3512 [ + - - + : 20082 : string key = (metric + "{"
- - ]
3513 [ + - + - : 40164 : + metric_label(lname, lvalue) + ","
- + - + +
+ - - ]
3514 [ + - - + : 30123 : + metric_label(rname, rvalue) + "}");
- + ]
3515 [ + - ]: 10041 : unique_lock<mutex> lock(metrics_lock);
3516 [ + - ]: 10041 : metrics[key] += value;
3517 [ + - ]: 20082 : }
3518 : :
3519 : : static struct MHD_Response*
3520 : 743 : handle_metrics (off_t* size)
3521 : : {
3522 : 743 : stringstream o;
3523 : 743 : {
3524 [ + - ]: 743 : unique_lock<mutex> lock(metrics_lock);
3525 [ + + ]: 79765 : for (auto&& i : metrics)
3526 [ + - ]: 79022 : o << i.first
3527 : : << " "
3528 [ + - + - ]: 79022 : << std::setprecision(std::numeric_limits<double>::digits10 + 1)
3529 [ + - + - ]: 79022 : << i.second
3530 : 79022 : << endl;
3531 : 0 : }
3532 [ + - ]: 743 : const string& os = o.str();
3533 [ + - ]: 743 : MHD_Response* r = MHD_create_response_from_buffer (os.size(),
3534 [ + - ]: 743 : (void*) os.c_str(),
3535 : : MHD_RESPMEM_MUST_COPY);
3536 [ + - ]: 743 : if (r != NULL)
3537 : : {
3538 [ + - ]: 743 : *size = os.size();
3539 [ + - ]: 743 : add_mhd_response_header (r, "Content-Type", "text/plain");
3540 : : }
3541 [ + - ]: 1486 : return r;
3542 : 743 : }
3543 : :
3544 : :
3545 : : static struct MHD_Response*
3546 : 26 : handle_metadata (MHD_Connection* conn,
3547 : : string key, string value, off_t* size)
3548 : : {
3549 : 26 : MHD_Response* r;
3550 : : // Because this query can take on the order of many seconds, we need
3551 : : // to prevent DoS against the other normal quick queries, so we use
3552 : : // a dedicated database connection.
3553 : 26 : sqlite3 *thisdb = 0;
3554 : 26 : int rc = sqlite3_open_v2 (db_path.c_str(), &thisdb, (SQLITE_OPEN_READONLY
3555 : : |SQLITE_OPEN_URI
3556 : : |SQLITE_OPEN_PRIVATECACHE
3557 : : |SQLITE_OPEN_NOMUTEX), /* private to us */
3558 : : NULL);
3559 [ - + ]: 26 : if (rc)
3560 [ # # # # ]: 0 : throw sqlite_exception(rc, "cannot open database for metadata query");
3561 : 26 : defer_dtor<sqlite3*,int> sqlite_db_closer (thisdb, sqlite3_close_v2);
3562 : :
3563 : : // Query locally for matching e, d files
3564 : 26 : string op;
3565 [ + + ]: 26 : if (key == "glob")
3566 [ + - ]: 22 : op = "glob";
3567 [ + - ]: 4 : else if (key == "file")
3568 [ + - ]: 4 : op = "=";
3569 : : else
3570 [ # # # # ]: 0 : throw reportable_exception("/metadata webapi error, unsupported key");
3571 : :
3572 : : // Since PR30378, the file names are segmented into two tables. We
3573 : : // could do a glob/= search over the _files_v view that combines
3574 : : // them, but that means that the entire _files_v thing has to be
3575 : : // materialized & scanned to do the query. Slow! Instead, we can
3576 : : // segment the incoming file/glob pattern into dirname / basename
3577 : : // parts, and apply them to the corresponding table. This is done
3578 : : // by splitting the value at the last "/". If absent, the same
3579 : : // convention as is used in register_file_name().
3580 : :
3581 : 26 : string dirname, bname; // basename is a "poisoned" identifier on some distros
3582 : 26 : size_t slash = value.rfind('/');
3583 [ - + ]: 26 : if (slash == std::string::npos) {
3584 [ # # ]: 0 : dirname = "";
3585 [ # # ]: 0 : bname = value;
3586 : : } else {
3587 [ + - - + ]: 26 : dirname = value.substr(0, slash);
3588 [ + - - + ]: 26 : bname = value.substr(slash+1);
3589 : : }
3590 : :
3591 : : // NB: further optimization is possible: replacing the 'glob' op
3592 : : // with simple equality, if the corresponding value segment lacks
3593 : : // metacharacters. sqlite may or may not be smart enough to do so,
3594 : : // so we help out.
3595 [ + - - - ]: 26 : string metacharacters = "[]*?";
3596 [ + + + + : 48 : string dop = (op == "glob" && dirname.find_first_of(metacharacters) == string::npos) ? "=" : op;
+ - + - -
- ]
3597 [ + + - + : 48 : string bop = (op == "glob" && bname.find_first_of(metacharacters) == string::npos) ? "=" : op;
- - + - -
- ]
3598 : :
3599 : 26 : string sql = string(
3600 : : // explicit query r_de and f_de once here, rather than the query_d and query_e
3601 : : // separately, because they scan the same tables, so we'd double the work
3602 : : "select d1.executable_p, d1.debuginfo_p, 0 as source_p, "
3603 : : " b1.hex, f1d.name || '/' || f1b.name as file, a1.name as archive "
3604 : : "from " BUILDIDS "_r_de d1, " BUILDIDS "_files f1, " BUILDIDS "_fileparts f1b, " BUILDIDS "_fileparts f1d, "
3605 : : BUILDIDS "_buildids b1, " BUILDIDS "_files_v a1 "
3606 : : "where f1.id = d1.content and a1.id = d1.file and d1.buildid = b1.id "
3607 [ + - + - : 78 : " and f1d.name " + dop + " ? and f1b.name " + bop + " ? and f1.dirname = f1d.id and f1.basename = f1b.id "
- + - + -
+ ]
3608 : : "union all \n"
3609 : : "select d2.executable_p, d2.debuginfo_p, 0, "
3610 : : " b2.hex, f2d.name || '/' || f2b.name, NULL "
3611 : : "from " BUILDIDS "_f_de d2, " BUILDIDS "_files f2, " BUILDIDS "_fileparts f2b, " BUILDIDS "_fileparts f2d, "
3612 : : BUILDIDS "_buildids b2 "
3613 : : "where f2.id = d2.file and d2.buildid = b2.id "
3614 [ + - + - : 78 : " and f2d.name " + dop + " ? and f2b.name " + bop + " ? "
- + - + -
+ - - ]
3615 [ - + ]: 26 : " and f2.dirname = f2d.id and f2.basename = f2b.id");
3616 : :
3617 : : // NB: we could query source file names too, thusly:
3618 : : //
3619 : : // select * from " BUILDIDS "_buildids b, " BUILDIDS "_files_v f1, " BUILDIDS "_r_sref sr
3620 : : // where b.id = sr.buildid and f1.id = sr.artifactsrc and f1.name " + op + "?"
3621 : : // UNION ALL something with BUILDIDS "_f_s"
3622 : : //
3623 : : // But the first part of this query cannot run fast without the same index temp-created
3624 : : // during "maxigroom":
3625 : : // create index " BUILDIDS "_r_sref_arc on " BUILDIDS "_r_sref(artifactsrc);
3626 : : // and unfortunately this index is HUGE. It's similar to the size of the _r_sref
3627 : : // table, which is already the largest part of a debuginfod index. Adding that index
3628 : : // would nearly double the .sqlite db size.
3629 : :
3630 [ + - + - : 26 : sqlite_ps *pp = new sqlite_ps (thisdb, "mhd-query-meta-glob", sql);
+ - + - ]
3631 [ + - ]: 26 : pp->reset();
3632 [ + - ]: 26 : pp->bind(1, dirname);
3633 [ + - ]: 26 : pp->bind(2, bname);
3634 [ + - ]: 26 : pp->bind(3, dirname);
3635 [ + - ]: 26 : pp->bind(4, bname);
3636 : 26 : unique_ptr<sqlite_ps> ps_closer(pp); // release pp if exception or return
3637 : 26 : pp->reset_timeout(metadata_maxtime_s);
3638 : :
3639 [ + - ]: 26 : json_object *metadata = json_object_new_object();
3640 [ - + - - : 26 : if (!metadata) throw libc_exception(ENOMEM, "json allocation");
- - ]
3641 : 26 : defer_dtor<json_object*,int> metadata_d(metadata, json_object_put);
3642 [ + - ]: 26 : json_object *metadata_arr = json_object_new_array();
3643 [ - + - - : 26 : if (!metadata_arr) throw libc_exception(ENOMEM, "json allocation");
- - ]
3644 [ + - ]: 26 : json_object_object_add(metadata, "results", metadata_arr);
3645 : : // consume all the rows
3646 : :
3647 : 42 : bool metadata_complete = true;
3648 : 42 : while (1)
3649 : : {
3650 [ + - ]: 42 : rc = pp->step_timeout();
3651 [ + + ]: 42 : if (rc == SQLITE_DONE) // success
3652 : : break;
3653 [ + - ]: 16 : if (rc == SQLITE_ABORT || rc == SQLITE_INTERRUPT) // interrupted such as by timeout
3654 : : {
3655 : : metadata_complete = false;
3656 : : break;
3657 : : }
3658 [ - + ]: 16 : if (rc != SQLITE_ROW) // error
3659 [ # # # # ]: 0 : throw sqlite_exception(rc, "step");
3660 : :
3661 [ + - ]: 16 : int m_executable_p = sqlite3_column_int (*pp, 0);
3662 [ + - ]: 16 : int m_debuginfo_p = sqlite3_column_int (*pp, 1);
3663 [ + - ]: 16 : int m_source_p = sqlite3_column_int (*pp, 2);
3664 [ + - - + : 16 : string m_buildid = (const char*) sqlite3_column_text (*pp, 3) ?: ""; // should always be non-null
+ - ]
3665 [ + - - + : 16 : string m_file = (const char*) sqlite3_column_text (*pp, 4) ?: "";
+ - - - ]
3666 [ + - - + : 16 : string m_archive = (const char*) sqlite3_column_text (*pp, 5) ?: "";
+ - - - ]
3667 : :
3668 : : // Confirm that m_file matches in the fnmatch(FNM_PATHNAME)
3669 : : // sense, since sqlite's GLOB operator is a looser filter.
3670 [ + - + - : 16 : if (key == "glob" && fnmatch(value.c_str(), m_file.c_str(), FNM_PATHNAME) != 0)
- + ]
3671 [ # # ]: 0 : continue;
3672 : :
3673 : 48 : auto add_metadata = [metadata_arr, m_buildid, m_file, m_archive](const string& type) {
3674 : 16 : json_object* entry = json_object_new_object();
3675 [ - + - - : 16 : if (NULL == entry) throw libc_exception (ENOMEM, "cannot allocate json");
- - ]
3676 : 16 : defer_dtor<json_object*,int> entry_d(entry, json_object_put);
3677 : :
3678 : 144 : auto add_entry_metadata = [entry](const char* k, string v) {
3679 : 64 : json_object* s;
3680 : 64 : if(v != "") {
3681 : 64 : s = json_object_new_string(v.c_str());
3682 [ - + - - : 64 : if (NULL == s) throw libc_exception (ENOMEM, "cannot allocate json");
- - ]
3683 : 64 : json_object_object_add(entry, k, s);
3684 : : }
3685 : 64 : };
3686 : :
3687 [ + - + - ]: 16 : add_entry_metadata("type", type.c_str());
3688 [ + - + - ]: 16 : add_entry_metadata("buildid", m_buildid);
3689 [ + - + - ]: 16 : add_entry_metadata("file", m_file);
3690 [ + - + - : 32 : if (m_archive != "") add_entry_metadata("archive", m_archive);
+ - ]
3691 [ - + ]: 16 : if (verbose > 3)
3692 [ # # ]: 0 : obatched(clog) << "metadata found local "
3693 : : << json_object_to_json_string_ext(entry,
3694 [ # # # # : 0 : JSON_C_TO_STRING_PRETTY)
# # ]
3695 : 0 : << endl;
3696 : :
3697 : : // Increase ref count to switch its ownership
3698 [ + - + - ]: 16 : json_object_array_add(metadata_arr, json_object_get(entry));
3699 [ + - + - : 32 : };
+ - ]
3700 : :
3701 [ + - + - : 32 : if (m_executable_p) add_metadata("executable");
+ - ]
3702 [ - + - - : 16 : if (m_debuginfo_p) add_metadata("debuginfo");
- - ]
3703 [ - + - - : 16 : if (m_source_p) add_metadata("source");
- - ]
3704 [ - - - - : 64 : }
+ - + - +
- ]
3705 [ + - ]: 26 : pp->reset();
3706 : :
3707 [ + - ]: 26 : unsigned num_local_results = json_object_array_length(metadata_arr);
3708 : :
3709 : : // Query upstream as well
3710 [ + - ]: 26 : debuginfod_client *client = debuginfod_pool_begin();
3711 [ + - ]: 26 : if (client != NULL)
3712 : : {
3713 [ + - ]: 26 : add_client_federation_headers(client, conn);
3714 : :
3715 : 26 : int upstream_metadata_fd;
3716 : 26 : char *upstream_metadata_file = NULL;
3717 [ + - ]: 26 : upstream_metadata_fd = debuginfod_find_metadata(client, key.c_str(), (char*)value.c_str(),
3718 : : &upstream_metadata_file);
3719 [ + + ]: 26 : if (upstream_metadata_fd >= 0) {
3720 : : /* json-c >= 0.13 has json_object_from_fd(). */
3721 [ + - ]: 16 : json_object *upstream_metadata_json = json_object_from_file(upstream_metadata_file);
3722 : 16 : free (upstream_metadata_file);
3723 : 16 : json_object *upstream_metadata_json_arr;
3724 : 16 : json_object *upstream_complete;
3725 [ - + ]: 16 : if (NULL != upstream_metadata_json &&
3726 [ + - + - : 32 : json_object_object_get_ex(upstream_metadata_json, "results", &upstream_metadata_json_arr) &&
- + ]
3727 [ + - ]: 16 : json_object_object_get_ex(upstream_metadata_json, "complete", &upstream_complete))
3728 : : {
3729 [ + - ]: 16 : metadata_complete &= json_object_get_boolean(upstream_complete);
3730 [ + - + + ]: 20 : for (int i = 0, n = json_object_array_length(upstream_metadata_json_arr); i < n; i++)
3731 : : {
3732 [ + - ]: 4 : json_object *entry = json_object_array_get_idx(upstream_metadata_json_arr, i);
3733 [ - + ]: 4 : if (verbose > 3)
3734 [ # # ]: 0 : obatched(clog) << "metadata found remote "
3735 : : << json_object_to_json_string_ext(entry,
3736 [ # # # # : 0 : JSON_C_TO_STRING_PRETTY)
# # ]
3737 : 0 : << endl;
3738 : :
3739 [ + - ]: 4 : json_object_get(entry); // increment reference count
3740 [ + - ]: 4 : json_object_array_add(metadata_arr, entry);
3741 : : }
3742 [ + - ]: 16 : json_object_put(upstream_metadata_json);
3743 : : }
3744 [ + - ]: 16 : close(upstream_metadata_fd);
3745 : : }
3746 [ + - ]: 26 : debuginfod_pool_end (client);
3747 : : }
3748 : :
3749 [ + - ]: 26 : unsigned num_total_results = json_object_array_length(metadata_arr);
3750 : :
3751 [ + - ]: 26 : if (verbose > 2)
3752 [ + - + - ]: 78 : obatched(clog) << "metadata found local=" << num_local_results
3753 [ + - + - ]: 26 : << " remote=" << (num_total_results-num_local_results)
3754 [ + - + - : 26 : << " total=" << num_total_results
+ - ]
3755 : 26 : << endl;
3756 : :
3757 [ + - + - ]: 26 : json_object_object_add(metadata, "complete", json_object_new_boolean(metadata_complete));
3758 [ + - ]: 26 : const char* metadata_str = json_object_to_json_string(metadata);
3759 [ - + ]: 26 : if (!metadata_str)
3760 [ # # # # ]: 0 : throw libc_exception (ENOMEM, "cannot allocate json");
3761 [ + - ]: 26 : r = MHD_create_response_from_buffer (strlen(metadata_str),
3762 : : (void*) metadata_str,
3763 : : MHD_RESPMEM_MUST_COPY);
3764 : 26 : *size = strlen(metadata_str);
3765 [ + - ]: 26 : if (r)
3766 [ + - ]: 26 : add_mhd_response_header(r, "Content-Type", "application/json");
3767 : 26 : return r;
3768 [ + - - + : 52 : }
- + - + -
+ - + -
+ ]
3769 : :
3770 : :
3771 : : static struct MHD_Response*
3772 : 0 : handle_root (off_t* size)
3773 : : {
3774 [ # # # # : 0 : static string version = "debuginfod (" + string (PACKAGE_NAME) + ") "
# # # # #
# # # ]
3775 [ # # # # : 0 : + string (PACKAGE_VERSION);
# # # # #
# ]
3776 : 0 : MHD_Response* r = MHD_create_response_from_buffer (version.size (),
3777 : 0 : (void *) version.c_str (),
3778 : : MHD_RESPMEM_PERSISTENT);
3779 [ # # ]: 0 : if (r != NULL)
3780 : : {
3781 : 0 : *size = version.size ();
3782 : 0 : add_mhd_response_header (r, "Content-Type", "text/plain");
3783 : : }
3784 : 0 : return r;
3785 : : }
3786 : :
3787 : :
3788 : : ////////////////////////////////////////////////////////////////////////
3789 : :
3790 : :
3791 : : /* libmicrohttpd callback */
3792 : : static MHD_RESULT
3793 : 6694 : handler_cb (void * /*cls*/,
3794 : : struct MHD_Connection *connection,
3795 : : const char *url,
3796 : : const char *method,
3797 : : const char * /*version*/,
3798 : : const char * /*upload_data*/,
3799 : : size_t * /*upload_data_size*/,
3800 : : void ** ptr)
3801 : : {
3802 : 6694 : struct MHD_Response *r = NULL;
3803 : 6694 : string url_copy = url;
3804 : :
3805 : : /* libmicrohttpd always makes (at least) two callbacks: once just
3806 : : past the headers, and one after the request body is finished
3807 : : being received. If we process things early (first callback) and
3808 : : queue a response, libmicrohttpd would suppress http keep-alive
3809 : : (via connection->read_closed = true). */
3810 : 6692 : static int aptr; /* just some random object to use as a flag */
3811 [ + + ]: 6692 : if (&aptr != *ptr)
3812 : : {
3813 : : /* do never respond on first call */
3814 : 3346 : *ptr = &aptr;
3815 : 3346 : return MHD_YES;
3816 : : }
3817 : 3346 : *ptr = NULL; /* reset when done */
3818 : :
3819 [ + - ]: 3346 : const char *maxsize_string = MHD_lookup_connection_value(connection, MHD_HEADER_KIND, "X-DEBUGINFOD-MAXSIZE");
3820 : 3347 : long maxsize = 0;
3821 [ + + + - ]: 3347 : if (maxsize_string != NULL && maxsize_string[0] != '\0')
3822 : 2 : maxsize = atol(maxsize_string);
3823 : : else
3824 : : maxsize = 0;
3825 : :
3826 : : #if MHD_VERSION >= 0x00097002
3827 : 3347 : enum MHD_Result rc;
3828 : : #else
3829 : : int rc = MHD_NO; // mhd
3830 : : #endif
3831 : 3347 : int http_code = 500;
3832 : 3347 : off_t http_size = -1;
3833 : 3347 : struct timespec ts_start, ts_end;
3834 : 3347 : clock_gettime (CLOCK_MONOTONIC, &ts_start);
3835 : 3347 : double afteryou = 0.0;
3836 [ + - ]: 3347 : string artifacttype, suffix;
3837 : 3347 : string urlargs; // for logging
3838 : :
3839 : 3347 : try
3840 : : {
3841 [ + - - + : 7328 : if (string(method) != "GET")
- + ]
3842 [ # # # # ]: 0 : throw reportable_exception(400, "we support GET only");
3843 : :
3844 : : /* Start decoding the URL. */
3845 : 3347 : size_t slash1 = url_copy.find('/', 1);
3846 [ + - ]: 3347 : string url1 = url_copy.substr(0, slash1); // ok even if slash1 not found
3847 : :
3848 [ + + + - ]: 5919 : if (slash1 != string::npos && url1 == "/buildid")
3849 : : {
3850 : : // PR27863: block this thread awhile if another thread is already busy
3851 : : // fetching the exact same thing. This is better for Everyone.
3852 : : // The latecomer says "... after you!" and waits.
3853 [ + - + - : 5770 : add_metric ("thread_busy", "role", "http-buildid-after-you", 1);
+ - + - -
+ + - - -
- - ]
3854 : : #ifdef HAVE_PTHREAD_SETNAME_NP
3855 : 2572 : (void) pthread_setname_np (pthread_self(), "mhd-buildid-after-you");
3856 : : #endif
3857 : 2572 : struct timespec tsay_start, tsay_end;
3858 : 2572 : clock_gettime (CLOCK_MONOTONIC, &tsay_start);
3859 [ + + + - ]: 2572 : static unique_set<string> busy_urls;
3860 [ + - ]: 2572 : unique_set_reserver<string> after_you(busy_urls, url_copy);
3861 : 2572 : clock_gettime (CLOCK_MONOTONIC, &tsay_end);
3862 : 2572 : afteryou = (tsay_end.tv_sec - tsay_start.tv_sec) + (tsay_end.tv_nsec - tsay_start.tv_nsec)/1.e9;
3863 [ + - + - : 5144 : add_metric ("thread_busy", "role", "http-buildid-after-you", -1);
+ - + - -
+ + - - -
- - ]
3864 : :
3865 [ + - + - : 5144 : tmp_inc_metric m ("thread_busy", "role", "http-buildid");
+ - + - -
+ - + - -
- - ]
3866 : : #ifdef HAVE_PTHREAD_SETNAME_NP
3867 : 2572 : (void) pthread_setname_np (pthread_self(), "mhd-buildid");
3868 : : #endif
3869 : 2572 : size_t slash2 = url_copy.find('/', slash1+1);
3870 [ - + ]: 2572 : if (slash2 == string::npos)
3871 [ # # # # ]: 0 : throw reportable_exception("/buildid/ webapi error, need buildid");
3872 : :
3873 [ + - ]: 2572 : string buildid = url_copy.substr(slash1+1, slash2-slash1-1);
3874 : :
3875 : 2572 : size_t slash3 = url_copy.find('/', slash2+1);
3876 : :
3877 [ + + ]: 2572 : if (slash3 == string::npos)
3878 : : {
3879 [ + - - + ]: 1462 : artifacttype = url_copy.substr(slash2+1);
3880 [ + - ]: 1462 : suffix = "";
3881 : : }
3882 : : else
3883 : : {
3884 [ + - - + ]: 1110 : artifacttype = url_copy.substr(slash2+1, slash3-slash2-1);
3885 [ + - - + : 1736 : suffix = url_copy.substr(slash3); // include the slash in the suffix
+ + ]
3886 : : }
3887 : :
3888 : : // get the resulting fd so we can report its size
3889 : 2572 : int fd;
3890 [ + + ]: 2572 : r = handle_buildid(connection, buildid, artifacttype, suffix, &fd);
3891 [ + - ]: 1946 : if (r)
3892 : : {
3893 : 1946 : struct stat fs;
3894 [ + - ]: 1946 : if (fstat(fd, &fs) == 0)
3895 : 1946 : http_size = fs.st_size;
3896 : : // libmicrohttpd will close (fd);
3897 : : }
3898 : 3198 : }
3899 [ + + ]: 775 : else if (url1 == "/metrics")
3900 : : {
3901 [ + - + - : 1486 : tmp_inc_metric m ("thread_busy", "role", "http-metrics");
+ - + - -
+ - + - -
- - ]
3902 [ + - ]: 743 : artifacttype = "metrics";
3903 [ + - + - : 1486 : inc_metric("http_requests_total", "type", artifacttype);
+ - - + -
- ]
3904 [ + - ]: 743 : r = handle_metrics(& http_size);
3905 : 743 : }
3906 [ + + ]: 32 : else if (url1 == "/metadata")
3907 : : {
3908 [ + - + - : 52 : tmp_inc_metric m ("thread_busy", "role", "http-metadata");
+ - + - -
+ - + - -
- - ]
3909 [ + - ]: 26 : const char* key = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "key");
3910 [ + - ]: 26 : const char* value = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "value");
3911 [ - + ]: 26 : if (NULL == value || NULL == key)
3912 [ # # # # ]: 0 : throw reportable_exception("/metadata webapi error, need key and value");
3913 : :
3914 [ + - + - : 26 : urlargs = string("?key=") + string(key) + string("&value=") + string(value); // apprx., for logging
+ - + - +
- + - + -
- + - + -
+ - + - +
- + + + -
- - - -
- ]
3915 [ + - ]: 26 : artifacttype = "metadata";
3916 [ + - + - : 52 : inc_metric("http_requests_total", "type", artifacttype);
+ - - + -
- ]
3917 [ + - + - : 26 : r = handle_metadata(connection, key, value, &http_size);
+ - - + +
+ - - ]
3918 : 26 : }
3919 [ - + ]: 6 : else if (url1 == "/")
3920 : : {
3921 [ # # ]: 0 : artifacttype = "/";
3922 [ - - - - : 634 : inc_metric("http_requests_total", "type", artifacttype);
- - - - -
- - + ]
3923 [ # # ]: 0 : r = handle_root(& http_size);
3924 : : }
3925 : : else
3926 [ + - + - : 18 : throw reportable_exception("webapi error, unrecognized '" + url1 + "'");
+ - - + ]
3927 : :
3928 [ - + ]: 2715 : if (r == 0)
3929 [ # # # # ]: 0 : throw reportable_exception("internal error, missing response");
3930 : :
3931 [ + + + - ]: 2715 : if (maxsize > 0 && http_size > maxsize)
3932 : : {
3933 [ + - ]: 2 : MHD_destroy_response(r);
3934 [ + - + - : 6 : throw reportable_exception(406, "File too large, max size=" + std::to_string(maxsize));
+ - - + ]
3935 : : }
3936 : :
3937 [ + - ]: 2713 : rc = MHD_queue_response (connection, MHD_HTTP_OK, r);
3938 : 2713 : http_code = MHD_HTTP_OK;
3939 [ + - ]: 2713 : MHD_destroy_response (r);
3940 : 3347 : }
3941 [ - + ]: 634 : catch (const reportable_exception& e)
3942 : : {
3943 [ + - + - : 1268 : inc_metric("http_responses_total","result","error");
+ - + - -
+ - + - -
- - ]
3944 [ + - ]: 634 : e.report(clog);
3945 : 634 : http_code = e.code;
3946 [ + - ]: 634 : http_size = e.message.size();
3947 [ + - ]: 634 : rc = e.mhd_send_response (connection);
3948 : 634 : }
3949 : :
3950 : 3347 : clock_gettime (CLOCK_MONOTONIC, &ts_end);
3951 : 3347 : double deltas = (ts_end.tv_sec - ts_start.tv_sec) + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9;
3952 : : // afteryou: delay waiting for other client's identical query to complete
3953 : : // deltas: total latency, including afteryou waiting
3954 [ + - + - : 6694 : obatched(clog) << conninfo(connection)
- - ]
3955 : : << ' ' << method << ' ' << url << urlargs
3956 [ + - + - : 3347 : << ' ' << http_code << ' ' << http_size
+ - + - +
- + - + -
+ - ]
3957 [ + - + - : 3347 : << ' ' << (int)(afteryou*1000) << '+' << (int)((deltas-afteryou)*1000) << "ms"
+ - + - +
- + - +
- ]
3958 [ + - ]: 3347 : << endl;
3959 : :
3960 : : // related prometheus metrics
3961 : 3347 : string http_code_str = to_string(http_code);
3962 [ + - + - : 6694 : add_metric("http_responses_transfer_bytes_sum",
+ - + - -
+ - + - -
- - ]
3963 : : "code", http_code_str, "type", artifacttype, http_size);
3964 [ + - + - : 6694 : inc_metric("http_responses_transfer_bytes_count",
+ - + - -
+ - + - -
- - ]
3965 : : "code", http_code_str, "type", artifacttype);
3966 : :
3967 [ + - + - : 6694 : add_metric("http_responses_duration_milliseconds_sum",
+ - + - -
+ - + - -
- - ]
3968 : : "code", http_code_str, "type", artifacttype, deltas*1000); // prometheus prefers _seconds and floating point
3969 [ + - + - : 6694 : inc_metric("http_responses_duration_milliseconds_count",
+ - + - -
+ - + - -
- - ]
3970 : : "code", http_code_str, "type", artifacttype);
3971 : :
3972 [ + - + - : 6694 : add_metric("http_responses_after_you_milliseconds_sum",
+ - + - -
+ - + - -
- - ]
3973 : : "code", http_code_str, "type", artifacttype, afteryou*1000);
3974 [ + - + - : 6694 : inc_metric("http_responses_after_you_milliseconds_count",
+ - + - -
+ - + - -
- - - - ]
3975 : : "code", http_code_str, "type", artifacttype);
3976 : :
3977 [ - + ]: 3347 : return rc;
3978 [ + + + + : 16311 : }
- + + + ]
3979 : :
3980 : :
3981 : : ////////////////////////////////////////////////////////////////////////
3982 : : // borrowed originally from src/nm.c get_local_names()
3983 : :
3984 : : static void
3985 : 412 : dwarf_extract_source_paths (Elf *elf, set<string>& debug_sourcefiles)
3986 : : noexcept // no exceptions - so we can simplify the altdbg resource release at end
3987 : : {
3988 : 412 : Dwarf* dbg = dwarf_begin_elf (elf, DWARF_C_READ, NULL);
3989 [ - + ]: 412 : if (dbg == NULL)
3990 : 0 : return;
3991 : :
3992 : 412 : Dwarf* altdbg = NULL;
3993 : 412 : int altdbg_fd = -1;
3994 : :
3995 : : // DWZ handling: if we have an unsatisfied debug-alt-link, add an
3996 : : // empty string into the outgoing sourcefiles set, so the caller
3997 : : // should know that our data is incomplete.
3998 : 412 : const char *alt_name_p;
3999 : 412 : const void *alt_build_id; // elfutils-owned memory
4000 : 412 : ssize_t sz = dwelf_dwarf_gnu_debugaltlink (dbg, &alt_name_p, &alt_build_id);
4001 [ + + ]: 412 : if (sz > 0) // got one!
4002 : : {
4003 : 200 : string buildid;
4004 : 200 : unsigned char* build_id_bytes = (unsigned char*) alt_build_id;
4005 [ + + ]: 4200 : for (ssize_t idx=0; idx<sz; idx++)
4006 : : {
4007 : 4000 : buildid += "0123456789abcdef"[build_id_bytes[idx] >> 4];
4008 : 4000 : buildid += "0123456789abcdef"[build_id_bytes[idx] & 0xf];
4009 : : }
4010 : :
4011 [ + + ]: 200 : if (verbose > 3)
4012 : 156 : obatched(clog) << "Need altdebug buildid=" << buildid << endl;
4013 : :
4014 : : // but is it unsatisfied the normal elfutils ways?
4015 : 200 : Dwarf* alt = dwarf_getalt (dbg);
4016 [ + - ]: 200 : if (alt == NULL)
4017 : : {
4018 : : // Yup, unsatisfied the normal way. Maybe we can satisfy it
4019 : : // from our own debuginfod database.
4020 : 200 : int alt_fd;
4021 : 200 : struct MHD_Response *r = 0;
4022 : 200 : try
4023 : : {
4024 [ + - ]: 200 : string artifacttype = "debuginfo";
4025 [ + - + + : 220 : r = handle_buildid (0, buildid, artifacttype, "", &alt_fd);
- + - + -
+ ]
4026 : 20 : }
4027 [ - + ]: 20 : catch (const reportable_exception& e)
4028 : : {
4029 : : // swallow exceptions
4030 : 20 : }
4031 : :
4032 : : // NB: this is not actually recursive! This invokes the web-query
4033 : : // path, which cannot get back into the scan code paths.
4034 [ + - ]: 180 : if (r)
4035 : : {
4036 : : // Found it!
4037 : 180 : altdbg_fd = dup(alt_fd); // ok if this fails, downstream failures ok
4038 : 180 : alt = altdbg = dwarf_begin (altdbg_fd, DWARF_C_READ);
4039 : : // NB: must close this dwarf and this fd at the bottom of the function!
4040 : 180 : MHD_destroy_response (r); // will close alt_fd
4041 [ + - ]: 180 : if (alt)
4042 : 180 : dwarf_setalt (dbg, alt);
4043 : : }
4044 : : }
4045 : : else
4046 : : {
4047 : : // NB: dwarf_setalt(alt) inappropriate - already done!
4048 : : // NB: altdbg will stay 0 so nothing tries to redundantly dealloc.
4049 : : }
4050 : :
4051 [ + + ]: 200 : if (alt)
4052 : : {
4053 [ + + ]: 180 : if (verbose > 3)
4054 : 156 : obatched(clog) << "Resolved altdebug buildid=" << buildid << endl;
4055 : : }
4056 : : else // (alt == NULL) - signal possible presence of poor debuginfo
4057 : : {
4058 [ - + ]: 20 : debug_sourcefiles.insert("");
4059 [ - + ]: 20 : if (verbose > 3)
4060 : 0 : obatched(clog) << "Unresolved altdebug buildid=" << buildid << endl;
4061 : : }
4062 : 200 : }
4063 : :
4064 : 412 : Dwarf_Off offset = 0;
4065 : 412 : Dwarf_Off old_offset;
4066 : 412 : size_t hsize;
4067 : :
4068 [ + + ]: 9260 : while (dwarf_nextcu (dbg, old_offset = offset, &offset, &hsize, NULL, NULL, NULL) == 0)
4069 : : {
4070 : 8848 : Dwarf_Die cudie_mem;
4071 : 8848 : Dwarf_Die *cudie = dwarf_offdie (dbg, old_offset + hsize, &cudie_mem);
4072 : :
4073 [ - + ]: 8848 : if (cudie == NULL)
4074 : 36 : continue;
4075 [ + + ]: 8848 : if (dwarf_tag (cudie) != DW_TAG_compile_unit)
4076 : 36 : continue;
4077 : :
4078 [ - + ]: 8812 : const char *cuname = dwarf_diename(cudie) ?: "unknown";
4079 : :
4080 : 8812 : Dwarf_Files *files;
4081 : 8812 : size_t nfiles;
4082 [ - + ]: 8812 : if (dwarf_getsrcfiles (cudie, &files, &nfiles) != 0)
4083 : 0 : continue;
4084 : :
4085 : : // extract DW_AT_comp_dir to resolve relative file names
4086 : 8812 : const char *comp_dir = "";
4087 : 8812 : const char *const *dirs;
4088 : 8812 : size_t ndirs;
4089 [ - + ]: 8812 : if (dwarf_getsrcdirs (files, &dirs, &ndirs) == 0 &&
4090 [ - + ]: 8812 : dirs[0] != NULL)
4091 : : comp_dir = dirs[0];
4092 : : if (comp_dir == NULL)
4093 : : comp_dir = "";
4094 : :
4095 [ + + ]: 8812 : if (verbose > 3)
4096 : 13556 : obatched(clog) << "searching for sources for cu=" << cuname << " comp_dir=" << comp_dir
4097 : 6774 : << " #files=" << nfiles << " #dirs=" << ndirs << endl;
4098 : :
4099 [ + - - - ]: 8812 : if (comp_dir[0] == '\0' && cuname[0] != '/')
4100 : : {
4101 [ # # ]: 0 : if (verbose > 3)
4102 : 0 : obatched(clog) << "skipping cu=" << cuname << " due to empty comp_dir" << endl;
4103 : 0 : continue;
4104 : : }
4105 : :
4106 [ + + ]: 143472 : for (size_t f = 1; f < nfiles; f++)
4107 : : {
4108 : 134660 : const char *hat = dwarf_filesrc (files, f, NULL, NULL);
4109 [ - + ]: 134691 : if (hat == NULL)
4110 : 0 : continue;
4111 : :
4112 [ + + ]: 134680 : if (string(hat) == "<built-in>"
4113 [ + + + + : 404013 : || string_endswith(hat, "<built-in>")) // gcc intrinsics, don't bother record
+ # + + ]
4114 : 1686 : continue;
4115 : :
4116 [ + + ]: 132994 : string waldo;
4117 [ + + ]: 132994 : if (hat[0] == '/') // absolute
4118 [ - + ]: 81708 : waldo = (string (hat));
4119 [ + - ]: 51286 : else if (comp_dir[0] != '\0') // comp_dir relative
4120 [ - + - + : 89561 : waldo = (string (comp_dir) + string("/") + string (hat));
- + - + +
+ ]
4121 : : else
4122 : : {
4123 [ # # ]: 0 : if (verbose > 3)
4124 : 0 : obatched(clog) << "skipping hat=" << hat << " due to empty comp_dir" << endl;
4125 [ # # ]: 0 : continue;
4126 : : }
4127 : :
4128 : : // NB: this is the 'waldo' that a dbginfo client will have
4129 : : // to supply for us to give them the file The comp_dir
4130 : : // prefixing is a definite complication. Otherwise we'd
4131 : : // have to return a setof comp_dirs (one per CU!) with
4132 : : // corresponding filesrc[] names, instead of one absolute
4133 : : // resoved set. Maybe we'll have to do that anyway. XXX
4134 : :
4135 [ + + ]: 132999 : if (verbose > 4)
4136 [ - + ]: 32 : obatched(clog) << waldo
4137 [ - + ]: 16 : << (debug_sourcefiles.find(waldo)==debug_sourcefiles.end() ? " new" : " dup") << endl;
4138 : :
4139 [ + - ]: 132999 : debug_sourcefiles.insert (waldo);
4140 : 134660 : }
4141 : : }
4142 : :
4143 : 412 : dwarf_end(dbg);
4144 [ + + ]: 412 : if (altdbg)
4145 : 180 : dwarf_end(altdbg);
4146 [ + + ]: 412 : if (altdbg_fd >= 0)
4147 : 180 : close(altdbg_fd);
4148 : : }
4149 : :
4150 : :
4151 : :
4152 : : static void
4153 : 1700 : elf_classify (int fd, bool &executable_p, bool &debuginfo_p, string &buildid, set<string>& debug_sourcefiles)
4154 : : {
4155 : 1700 : Elf *elf = elf_begin (fd, ELF_C_READ_MMAP_PRIVATE, NULL);
4156 [ + - ]: 1700 : if (elf == NULL)
4157 : : return;
4158 : :
4159 : 1700 : try // catch our types of errors and clean up the Elf* object
4160 : : {
4161 [ + - + + ]: 1700 : if (elf_kind (elf) != ELF_K_ELF)
4162 : : {
4163 [ + - ]: 898 : elf_end (elf);
4164 : 938 : return;
4165 : : }
4166 : :
4167 : 802 : GElf_Ehdr ehdr_storage;
4168 [ + - ]: 802 : GElf_Ehdr *ehdr = gelf_getehdr (elf, &ehdr_storage);
4169 [ - + ]: 802 : if (ehdr == NULL)
4170 : : {
4171 [ # # ]: 0 : elf_end (elf);
4172 : : return;
4173 : : }
4174 : 802 : auto elf_type = ehdr->e_type;
4175 : :
4176 : 802 : const void *build_id; // elfutils-owned memory
4177 [ + - ]: 802 : ssize_t sz = dwelf_elf_gnu_build_id (elf, & build_id);
4178 [ + + ]: 802 : if (sz <= 0)
4179 : : {
4180 : : // It's not a diagnostic-worthy error for an elf file to lack build-id.
4181 : : // It might just be very old.
4182 [ + - ]: 40 : elf_end (elf);
4183 : : return;
4184 : : }
4185 : :
4186 : : // build_id is a raw byte array; convert to hexadecimal *lowercase*
4187 : 762 : unsigned char* build_id_bytes = (unsigned char*) build_id;
4188 [ + + ]: 15994 : for (ssize_t idx=0; idx<sz; idx++)
4189 : : {
4190 [ + - ]: 15233 : buildid += "0123456789abcdef"[build_id_bytes[idx] >> 4];
4191 [ + - ]: 30469 : buildid += "0123456789abcdef"[build_id_bytes[idx] & 0xf];
4192 : : }
4193 : :
4194 : : // now decide whether it's an executable - namely, any allocatable section has
4195 : : // PROGBITS;
4196 [ + + ]: 761 : if (elf_type == ET_EXEC || elf_type == ET_DYN)
4197 : : {
4198 : 700 : size_t shnum;
4199 [ + - ]: 700 : int rc = elf_getshdrnum (elf, &shnum);
4200 [ - + ]: 700 : if (rc < 0)
4201 [ # # # # ]: 0 : throw elfutils_exception(rc, "getshdrnum");
4202 : :
4203 : 700 : executable_p = false;
4204 [ + + ]: 13196 : for (size_t sc = 0; sc < shnum; sc++)
4205 : : {
4206 [ + - ]: 12870 : Elf_Scn *scn = elf_getscn (elf, sc);
4207 [ - + ]: 12869 : if (scn == NULL)
4208 : 0 : continue;
4209 : :
4210 : 12869 : GElf_Shdr shdr_mem;
4211 [ + - ]: 12869 : GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
4212 [ - + ]: 12870 : if (shdr == NULL)
4213 : 0 : continue;
4214 : :
4215 : : // allocated (loadable / vm-addr-assigned) section with available content?
4216 [ + + + + ]: 12870 : if ((shdr->sh_type == SHT_PROGBITS) && (shdr->sh_flags & SHF_ALLOC))
4217 : : {
4218 [ - + ]: 374 : if (verbose > 4)
4219 [ # # # # : 0 : obatched(clog) << "executable due to SHF_ALLOC SHT_PROGBITS sc=" << sc << endl;
# # ]
4220 : 374 : executable_p = true;
4221 : 374 : break; // no need to keep looking for others
4222 : : }
4223 : : } // iterate over sections
4224 : : } // executable_p classification
4225 : :
4226 : : // now decide whether it's a debuginfo - namely, if it has any .debug* or .zdebug* sections
4227 : : // logic mostly stolen from fweimer@redhat.com's elfclassify drafts
4228 : 761 : size_t shstrndx;
4229 [ + - ]: 761 : int rc = elf_getshdrstrndx (elf, &shstrndx);
4230 [ - + ]: 762 : if (rc < 0)
4231 [ # # # # ]: 0 : throw elfutils_exception(rc, "getshdrstrndx");
4232 : :
4233 : : Elf_Scn *scn = NULL;
4234 : : bool symtab_p = false;
4235 : : bool bits_alloc_p = false;
4236 : 40856 : while (true)
4237 : : {
4238 [ + - ]: 20809 : scn = elf_nextscn (elf, scn);
4239 [ + + ]: 20771 : if (scn == NULL)
4240 : : break;
4241 : 20421 : GElf_Shdr shdr_storage;
4242 [ + - ]: 20421 : GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_storage);
4243 [ - + ]: 20421 : if (shdr == NULL)
4244 : : break;
4245 [ + - ]: 20421 : const char *section_name = elf_strptr (elf, shstrndx, shdr->sh_name);
4246 [ - + ]: 20459 : if (section_name == NULL)
4247 : : break;
4248 [ + + ]: 20459 : if (startswith (section_name, ".debug_line") ||
4249 [ - + ]: 20047 : startswith (section_name, ".zdebug_line"))
4250 : : {
4251 : 412 : debuginfo_p = true;
4252 [ + - ]: 412 : if (scan_source_info)
4253 : 412 : dwarf_extract_source_paths (elf, debug_sourcefiles);
4254 : : break; // expecting only one .*debug_line, so no need to look for others
4255 : : }
4256 [ + + ]: 20047 : else if (startswith (section_name, ".debug_") ||
4257 [ + # ]: 18761 : startswith (section_name, ".zdebug_"))
4258 : : {
4259 : 1250 : debuginfo_p = true;
4260 : : // NB: don't break; need to parse .debug_line for sources
4261 : : }
4262 [ + + ]: 18797 : else if (shdr->sh_type == SHT_SYMTAB)
4263 : : {
4264 : : symtab_p = true;
4265 : : }
4266 : 18773 : else if (shdr->sh_type != SHT_NOBITS
4267 [ + + ]: 18773 : && shdr->sh_type != SHT_NOTE
4268 [ + + ]: 9165 : && (shdr->sh_flags & SHF_ALLOC) != 0)
4269 : : {
4270 : 20047 : bits_alloc_p = true;
4271 : : }
4272 : 20047 : }
4273 : :
4274 : : // For more expansive elf/split-debuginfo classification, we
4275 : : // want to identify as debuginfo "strip -s"-produced files
4276 : : // without .debug_info* (like libicudata), but we don't want to
4277 : : // identify "strip -g" executables (with .symtab left there).
4278 [ - + ]: 762 : if (symtab_p && !bits_alloc_p)
4279 : 0 : debuginfo_p = true;
4280 : : }
4281 [ # # ]: 0 : catch (const reportable_exception& e)
4282 : : {
4283 [ # # ]: 0 : e.report(clog);
4284 : 0 : }
4285 : 762 : elf_end (elf);
4286 : : }
4287 : :
4288 : :
4289 : : // Intern the given file name in two parts (dirname & basename) and
4290 : : // return the resulting file's id.
4291 : : static int64_t
4292 : 32726 : register_file_name(sqlite_ps& ps_upsert_fileparts,
4293 : : sqlite_ps& ps_upsert_file,
4294 : : sqlite_ps& ps_lookup_file,
4295 : : const string& name)
4296 : : {
4297 : 32726 : std::size_t slash = name.rfind('/');
4298 [ + + ]: 32726 : string dirname, filename;
4299 [ + + ]: 32726 : if (slash == std::string::npos)
4300 : : {
4301 [ + - ]: 90 : dirname = "";
4302 [ + - ]: 90 : filename = name;
4303 : : }
4304 : : else
4305 : : {
4306 [ + - - + ]: 32636 : dirname = name.substr(0, slash);
4307 [ + - - + : 32634 : filename = name.substr(slash+1);
- - ]
4308 : : }
4309 : : // NB: see also handle_metadata()
4310 : :
4311 : : // intern the two substrings
4312 : 32725 : ps_upsert_fileparts
4313 [ + - ]: 32725 : .reset()
4314 [ + - ]: 32726 : .bind(1, dirname)
4315 [ + - ]: 32723 : .step_ok_done();
4316 : 32726 : ps_upsert_fileparts
4317 [ + - ]: 32726 : .reset()
4318 [ + - ]: 32726 : .bind(1, filename)
4319 [ + - ]: 32725 : .step_ok_done();
4320 : :
4321 : : // intern the tuple
4322 : 32726 : ps_upsert_file
4323 [ + - ]: 32726 : .reset()
4324 [ + - ]: 32726 : .bind(1, dirname)
4325 [ + - ]: 32724 : .bind(2, filename)
4326 [ + - ]: 32724 : .step_ok_done();
4327 : :
4328 : : // look up the tuple's id
4329 : 32726 : ps_lookup_file
4330 [ + - ]: 32726 : .reset()
4331 [ + - ]: 32726 : .bind(1, dirname)
4332 [ + - ]: 32726 : .bind(2, filename);
4333 [ + - ]: 32726 : int rc = ps_lookup_file.step();
4334 [ - + - - : 32726 : if (rc != SQLITE_ROW) throw sqlite_exception(rc, "step");
- - ]
4335 : :
4336 [ + - ]: 32726 : int64_t id = sqlite3_column_int64 (ps_lookup_file, 0);
4337 [ + - ]: 32725 : ps_lookup_file.reset();
4338 [ + + ]: 32726 : return id;
4339 [ + + ]: 63128 : }
4340 : :
4341 : :
4342 : :
4343 : : static void
4344 : 1098 : scan_source_file (const string& rps, const stat_t& st,
4345 : : sqlite_ps& ps_upsert_buildids,
4346 : : sqlite_ps& ps_upsert_fileparts,
4347 : : sqlite_ps& ps_upsert_file,
4348 : : sqlite_ps& ps_lookup_file,
4349 : : sqlite_ps& ps_upsert_de,
4350 : : sqlite_ps& ps_upsert_s,
4351 : : sqlite_ps& ps_query,
4352 : : sqlite_ps& ps_scan_done,
4353 : : unsigned& fts_cached,
4354 : : unsigned& fts_executable,
4355 : : unsigned& fts_debuginfo,
4356 : : unsigned& fts_sourcefiles)
4357 : : {
4358 : 1098 : int64_t fileid = register_file_name(ps_upsert_fileparts, ps_upsert_file, ps_lookup_file, rps);
4359 : :
4360 : : /* See if we know of it already. */
4361 : 1098 : int rc = ps_query
4362 : 1098 : .reset()
4363 : 1098 : .bind(1, fileid)
4364 : 1098 : .bind(2, st.st_mtime)
4365 : 1098 : .step();
4366 : 1098 : ps_query.reset();
4367 [ + + ]: 1098 : if (rc == SQLITE_ROW) // i.e., a result, as opposed to DONE (no results)
4368 : : // no need to recheck a file/version we already know
4369 : : // specifically, no need to elf-begin a file we already determined is non-elf
4370 : : // (so is stored with buildid=NULL)
4371 : : {
4372 : 438 : fts_cached++;
4373 : 438 : return;
4374 : : }
4375 : :
4376 : 660 : bool executable_p = false, debuginfo_p = false; // E and/or D
4377 [ + - ]: 660 : string buildid;
4378 [ + - ]: 660 : set<string> sourcefiles;
4379 : :
4380 [ + - ]: 660 : int fd = open (rps.c_str(), O_RDONLY);
4381 : 660 : try
4382 : : {
4383 [ + - ]: 660 : if (fd >= 0)
4384 [ + - ]: 660 : elf_classify (fd, executable_p, debuginfo_p, buildid, sourcefiles);
4385 : : else
4386 [ # # # # : 0 : throw libc_exception(errno, string("open ") + rps);
# # # # ]
4387 [ + - + - : 1320 : add_metric ("scanned_bytes_total","source","file",
+ - - + -
+ - - -
- ]
4388 [ + - ]: 660 : st.st_size);
4389 [ + - + - : 1320 : inc_metric ("scanned_files_total","source","file");
+ - + - -
+ - + - -
- - ]
4390 : : }
4391 : : // NB: we catch exceptions here too, so that we can
4392 : : // cache the corrupt-elf case (!executable_p &&
4393 : : // !debuginfo_p) just below, just as if we had an
4394 : : // EPERM error from open(2).
4395 [ - - ]: 0 : catch (const reportable_exception& e)
4396 : : {
4397 [ - - ]: 0 : e.report(clog);
4398 : 0 : }
4399 : :
4400 [ + - ]: 660 : if (fd >= 0)
4401 [ + - ]: 660 : close (fd);
4402 : :
4403 [ + + ]: 660 : if (buildid == "")
4404 : : {
4405 : : // no point storing an elf file without buildid
4406 : 564 : executable_p = false;
4407 : 564 : debuginfo_p = false;
4408 : : }
4409 : : else
4410 : : {
4411 : : // register this build-id in the interning table
4412 : 96 : ps_upsert_buildids
4413 [ + - ]: 96 : .reset()
4414 [ + - ]: 96 : .bind(1, buildid)
4415 [ + - ]: 96 : .step_ok_done();
4416 : : }
4417 : :
4418 [ + + ]: 660 : if (executable_p)
4419 : 72 : fts_executable ++;
4420 [ + + ]: 660 : if (debuginfo_p)
4421 : 72 : fts_debuginfo ++;
4422 [ + + + + ]: 660 : if (executable_p || debuginfo_p)
4423 : : {
4424 : 96 : ps_upsert_de
4425 [ + - ]: 96 : .reset()
4426 [ + - ]: 96 : .bind(1, buildid)
4427 [ + + + - ]: 120 : .bind(2, debuginfo_p ? 1 : 0)
4428 [ + + + - ]: 120 : .bind(3, executable_p ? 1 : 0)
4429 [ + - ]: 96 : .bind(4, fileid)
4430 [ + - ]: 96 : .bind(5, st.st_mtime)
4431 [ + - ]: 96 : .step_ok_done();
4432 : : }
4433 [ + + ]: 660 : if (executable_p)
4434 [ + - + - : 144 : inc_metric("found_executable_total","source","files");
+ - + - -
+ - + - -
- - ]
4435 [ + + ]: 660 : if (debuginfo_p)
4436 [ + - + - : 144 : inc_metric("found_debuginfo_total","source","files");
+ - + - -
+ - + - -
- - ]
4437 : :
4438 [ + + + - ]: 732 : if (sourcefiles.size() && buildid != "")
4439 : : {
4440 : 72 : fts_sourcefiles += sourcefiles.size();
4441 : :
4442 [ + + ]: 14746 : for (auto&& dwarfsrc : sourcefiles)
4443 : : {
4444 [ + - ]: 14674 : char *srp = realpath(dwarfsrc.c_str(), NULL);
4445 [ - + ]: 14674 : if (srp == NULL) // also if DWZ unresolved dwarfsrc=""
4446 : 0 : continue; // unresolvable files are not a serious problem
4447 : : // throw libc_exception(errno, "fts/file realpath " + srcpath);
4448 [ + - ]: 14674 : string srps = string(srp);
4449 : 14674 : free (srp);
4450 : :
4451 : 14674 : struct stat sfs;
4452 : 14674 : rc = stat(srps.c_str(), &sfs);
4453 [ - + ]: 14674 : if (rc != 0)
4454 [ # # ]: 0 : continue;
4455 : :
4456 [ + - ]: 14674 : if (verbose > 2)
4457 [ + - + - : 44022 : obatched(clog) << "recorded buildid=" << buildid << " file=" << srps
- - ]
4458 [ + - + - : 14674 : << " mtime=" << sfs.st_mtime
+ - + - ]
4459 [ + - + - : 14674 : << " as source " << dwarfsrc << endl;
+ - ]
4460 : :
4461 : : // PR25548: store canonicalized dwarfsrc path
4462 [ + - ]: 14674 : string dwarfsrc_canon = canon_pathname (dwarfsrc);
4463 [ + + ]: 14674 : if (dwarfsrc_canon != dwarfsrc)
4464 : : {
4465 [ + + ]: 2636 : if (verbose > 3)
4466 [ + - + - : 4168 : obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
+ - + - +
- ]
4467 : : }
4468 : :
4469 [ + - ]: 14674 : int64_t fileid1 = register_file_name (ps_upsert_fileparts, ps_upsert_file, ps_lookup_file, dwarfsrc_canon);
4470 [ + - ]: 14674 : int64_t fileid2 = register_file_name (ps_upsert_fileparts, ps_upsert_file, ps_lookup_file, srps);
4471 : :
4472 : 14674 : ps_upsert_s
4473 [ + - ]: 14674 : .reset()
4474 [ + - ]: 14674 : .bind(1, buildid)
4475 [ + - ]: 14673 : .bind(2, fileid1)
4476 [ + - ]: 14674 : .bind(3, fileid2)
4477 [ + - ]: 14674 : .bind(4, sfs.st_mtime)
4478 [ + - ]: 14673 : .step_ok_done();
4479 : :
4480 [ + - + - : 29348 : inc_metric("found_sourcerefs_total","source","files");
+ - + - -
+ - + + -
- - - - -
- ]
4481 [ + - ]: 29348 : }
4482 : : }
4483 : :
4484 : 660 : ps_scan_done
4485 [ + - ]: 660 : .reset()
4486 [ + - ]: 660 : .bind(1, fileid)
4487 [ + - ]: 660 : .bind(2, st.st_mtime)
4488 [ + - ]: 660 : .bind(3, st.st_size)
4489 [ + - ]: 660 : .step_ok_done();
4490 : :
4491 [ + - ]: 660 : if (verbose > 2)
4492 [ + - + - ]: 1980 : obatched(clog) << "recorded buildid=" << buildid << " file=" << rps
4493 [ + - + - : 660 : << " mtime=" << st.st_mtime << " atype="
+ - + - ]
4494 : : << (executable_p ? "E" : "")
4495 [ + - + + : 1836 : << (debuginfo_p ? "D" : "") << endl;
+ - + + +
- + - ]
4496 [ + + ]: 756 : }
4497 : :
4498 : :
4499 : :
4500 : :
4501 : :
4502 : : // Analyze given archive file of given age; record buildids / exec/debuginfo-ness of its
4503 : : // constituent files with given upsert statements.
4504 : : static void
4505 : 396 : archive_classify (const string& rps, string& archive_extension, int64_t archiveid,
4506 : : sqlite_ps& ps_upsert_buildids, sqlite_ps& ps_upsert_fileparts, sqlite_ps& ps_upsert_file,
4507 : : sqlite_ps& ps_lookup_file,
4508 : : sqlite_ps& ps_upsert_de, sqlite_ps& ps_upsert_sref, sqlite_ps& ps_upsert_sdef,
4509 : : sqlite_ps& ps_upsert_seekable,
4510 : : time_t mtime,
4511 : : unsigned& fts_executable, unsigned& fts_debuginfo, unsigned& fts_sref, unsigned& fts_sdef,
4512 : : bool& fts_sref_complete_p)
4513 : : {
4514 : 396 : string archive_decoder = "/dev/null";
4515 [ + + ]: 1024 : for (auto&& arch : scan_archives)
4516 [ + + ]: 628 : if (string_endswith(rps, arch.first))
4517 : : {
4518 [ + - ]: 396 : archive_extension = arch.first;
4519 [ + - ]: 1024 : archive_decoder = arch.second;
4520 : : }
4521 : :
4522 : 396 : FILE* fp;
4523 : 396 : defer_dtor<FILE*,int>::dtor_fn dfn;
4524 [ + + ]: 396 : if (archive_decoder != "cat")
4525 : : {
4526 [ + - + - : 80 : string popen_cmd = archive_decoder + " " + shell_escape(rps);
+ - - + -
- - - ]
4527 [ + - ]: 40 : fp = popen (popen_cmd.c_str(), "r"); // "e" O_CLOEXEC?
4528 : 40 : dfn = pclose;
4529 [ - + ]: 40 : if (fp == NULL)
4530 [ # # # # : 0 : throw libc_exception (errno, string("popen ") + popen_cmd);
# # # # ]
4531 : 40 : }
4532 : : else
4533 : : {
4534 [ + - ]: 356 : fp = fopen (rps.c_str(), "r");
4535 : 356 : dfn = fclose;
4536 [ - + ]: 356 : if (fp == NULL)
4537 [ # # # # : 0 : throw libc_exception (errno, string("fopen ") + rps);
# # # # ]
4538 : : }
4539 : 396 : defer_dtor<FILE*,int> fp_closer (fp, dfn);
4540 : :
4541 : 396 : struct archive *a;
4542 [ + - ]: 396 : a = archive_read_new();
4543 [ - + ]: 396 : if (a == NULL)
4544 [ # # # # ]: 0 : throw archive_exception("cannot create archive reader");
4545 : 396 : defer_dtor<struct archive*,int> archive_closer (a, archive_read_free);
4546 : :
4547 [ + - ]: 396 : int rc = archive_read_support_format_all(a);
4548 [ - + ]: 396 : if (rc != ARCHIVE_OK)
4549 [ # # # # ]: 0 : throw archive_exception(a, "cannot select all formats");
4550 [ + - ]: 396 : rc = archive_read_support_filter_all(a);
4551 [ - + ]: 396 : if (rc != ARCHIVE_OK)
4552 [ # # # # ]: 0 : throw archive_exception(a, "cannot select all filters");
4553 : :
4554 [ + - ]: 396 : rc = archive_read_open_FILE (a, fp);
4555 [ - + ]: 396 : if (rc != ARCHIVE_OK)
4556 : : {
4557 [ # # # # : 0 : obatched(clog) << "cannot open archive from pipe " << rps << endl;
# # ]
4558 [ # # # # ]: 0 : throw archive_exception(a, "cannot open archive from pipe");
4559 : : }
4560 : :
4561 [ + + ]: 396 : if (verbose > 3)
4562 [ + - + - : 712 : obatched(clog) << "libarchive scanning " << rps << " id " << archiveid << endl;
+ - + - +
- ]
4563 : :
4564 [ + - ]: 396 : bool seekable = is_seekable_archive (rps, a);
4565 [ + - + + ]: 396 : if (verbose> 2 && seekable)
4566 [ + - + - : 64 : obatched(clog) << rps << " is seekable" << endl;
+ - ]
4567 : :
4568 : : bool any_exceptions = false;
4569 : 3774 : while(1) // parse archive entries
4570 : : {
4571 [ + - ]: 3774 : if (interrupted)
4572 : : break;
4573 : :
4574 : 3774 : try
4575 : : {
4576 : 3774 : struct archive_entry *e;
4577 [ + - ]: 3774 : rc = archive_read_next_header (a, &e);
4578 [ + + ]: 3774 : if (rc != ARCHIVE_OK)
4579 : : break;
4580 : :
4581 [ + - + + ]: 3378 : if (! S_ISREG(archive_entry_mode (e))) // skip non-files completely
4582 : 2338 : continue;
4583 : :
4584 [ + - ]: 1040 : string fn = canonicalized_archive_entry_pathname (e);
4585 : :
4586 [ + + ]: 1040 : if (verbose > 3)
4587 [ + - + - : 1756 : obatched(clog) << "libarchive checking " << fn << endl;
+ - - - ]
4588 : :
4589 [ + - ]: 1040 : int64_t seekable_size = archive_entry_size (e);
4590 [ + - ]: 1040 : int64_t seekable_offset = archive_filter_bytes (a, 0);
4591 [ + - ]: 1040 : time_t seekable_mtime = archive_entry_mtime (e);
4592 : :
4593 : : // extract this file to a temporary file
4594 : 1040 : char* tmppath = NULL;
4595 : 1040 : rc = asprintf (&tmppath, "%s/debuginfod-classify.XXXXXX", tmpdir.c_str());
4596 [ - + ]: 1040 : if (rc < 0)
4597 [ # # # # ]: 0 : throw libc_exception (ENOMEM, "cannot allocate tmppath");
4598 : 1040 : defer_dtor<void*,void> tmmpath_freer (tmppath, free);
4599 [ + - ]: 1040 : int fd = mkstemp (tmppath);
4600 [ - + ]: 1040 : if (fd < 0)
4601 [ # # # # ]: 0 : throw libc_exception (errno, "cannot create temporary file");
4602 : 1040 : unlink (tmppath); // unlink now so OS will release the file as soon as we close the fd
4603 : 1040 : defer_dtor<int,int> minifd_closer (fd, close);
4604 : :
4605 [ + - ]: 1040 : rc = archive_read_data_into_fd (a, fd);
4606 [ - + ]: 1040 : if (rc != ARCHIVE_OK) {
4607 [ # # ]: 0 : close (fd);
4608 [ # # # # ]: 0 : throw archive_exception(a, "cannot extract file");
4609 : : }
4610 : :
4611 : : // finally ... time to run elf_classify on this bad boy and update the database
4612 : 1040 : bool executable_p = false, debuginfo_p = false;
4613 [ + - ]: 1040 : string buildid;
4614 [ + - ]: 1040 : set<string> sourcefiles;
4615 [ + - ]: 1040 : elf_classify (fd, executable_p, debuginfo_p, buildid, sourcefiles);
4616 : : // NB: might throw
4617 : :
4618 [ + + ]: 1040 : if (buildid != "") // intern buildid
4619 : : {
4620 : 666 : ps_upsert_buildids
4621 [ + - ]: 666 : .reset()
4622 [ + - ]: 666 : .bind(1, buildid)
4623 [ + - ]: 666 : .step_ok_done();
4624 : : }
4625 : :
4626 [ + - ]: 1040 : int64_t fileid = register_file_name (ps_upsert_fileparts, ps_upsert_file, ps_lookup_file, fn);
4627 : :
4628 [ + + ]: 1040 : if (sourcefiles.size() > 0) // sref records needed
4629 : : {
4630 : : // NB: we intern each source file once. Once raw, as it
4631 : : // appears in the DWARF file list coming back from
4632 : : // elf_classify() - because it'll end up in the
4633 : : // _norm.artifactsrc column. We don't also put another
4634 : : // version with a '.' at the front, even though that's
4635 : : // how rpm/cpio packs names, because we hide that from
4636 : : // the database for storage efficiency.
4637 : :
4638 [ + + ]: 802 : for (auto&& s : sourcefiles)
4639 : : {
4640 [ + + ]: 498 : if (s == "")
4641 : : {
4642 : 20 : fts_sref_complete_p = false;
4643 : 20 : continue;
4644 : : }
4645 : :
4646 : : // PR25548: store canonicalized source path
4647 : 478 : const string& dwarfsrc = s;
4648 [ + - ]: 478 : string dwarfsrc_canon = canon_pathname (dwarfsrc);
4649 [ + + ]: 478 : if (dwarfsrc_canon != dwarfsrc)
4650 : : {
4651 [ + - ]: 28 : if (verbose > 3)
4652 [ + - + - : 56 : obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
+ - + - +
- - - ]
4653 : : }
4654 : :
4655 [ + - ]: 478 : int64_t srcfileid = register_file_name(ps_upsert_fileparts, ps_upsert_file, ps_lookup_file,
4656 : : dwarfsrc_canon);
4657 : :
4658 : 478 : ps_upsert_sref
4659 [ + - ]: 478 : .reset()
4660 [ + - ]: 478 : .bind(1, buildid)
4661 [ + - ]: 478 : .bind(2, srcfileid)
4662 [ + - ]: 478 : .step_ok_done();
4663 : :
4664 [ + - ]: 478 : fts_sref ++;
4665 : 478 : }
4666 : : }
4667 : :
4668 [ + + ]: 1040 : if (executable_p)
4669 : 302 : fts_executable ++;
4670 [ + + ]: 1040 : if (debuginfo_p)
4671 : 366 : fts_debuginfo ++;
4672 : :
4673 [ + + + + ]: 1040 : if (executable_p || debuginfo_p)
4674 : : {
4675 : 666 : ps_upsert_de
4676 [ + - ]: 666 : .reset()
4677 [ + - ]: 666 : .bind(1, buildid)
4678 [ + + + - ]: 966 : .bind(2, debuginfo_p ? 1 : 0)
4679 [ + + + - ]: 1030 : .bind(3, executable_p ? 1 : 0)
4680 [ + - ]: 666 : .bind(4, archiveid)
4681 [ + - ]: 666 : .bind(5, mtime)
4682 [ + - ]: 666 : .bind(6, fileid)
4683 [ + - ]: 666 : .step_ok_done();
4684 [ + + ]: 666 : if (seekable)
4685 : 336 : ps_upsert_seekable
4686 [ + - ]: 336 : .reset()
4687 [ + - ]: 336 : .bind(1, archiveid)
4688 [ + - ]: 336 : .bind(2, fileid)
4689 [ + - ]: 336 : .bind(3, seekable_size)
4690 [ + - ]: 336 : .bind(4, seekable_offset)
4691 [ + - ]: 336 : .bind(5, seekable_mtime)
4692 [ + - ]: 336 : .step_ok_done();
4693 : : }
4694 : : else // potential source - sdef record
4695 : : {
4696 : 374 : fts_sdef ++;
4697 : 374 : ps_upsert_sdef
4698 [ + - ]: 374 : .reset()
4699 [ + - ]: 374 : .bind(1, archiveid)
4700 [ + - ]: 374 : .bind(2, mtime)
4701 [ + - ]: 374 : .bind(3, fileid)
4702 [ + - ]: 374 : .step_ok_done();
4703 : : }
4704 : :
4705 [ + - + + : 1040 : if ((verbose > 2) && (executable_p || debuginfo_p))
+ + ]
4706 : : {
4707 [ + - ]: 666 : obatched ob(clog);
4708 [ + - + - ]: 666 : auto& o = ob << "recorded buildid=" << buildid << " rpm=" << rps << " file=" << fn
4709 [ + - + - : 666 : << " mtime=" << mtime << " atype="
+ - + - +
- + - ]
4710 : : << (executable_p ? "E" : "")
4711 : : << (debuginfo_p ? "D" : "")
4712 [ + - + + : 1330 : << " sourcefiles=" << sourcefiles.size();
+ - + + +
- + - +
- ]
4713 [ + + ]: 666 : if (seekable)
4714 [ + - + - ]: 336 : o << " seekable size=" << seekable_size
4715 [ + - + - ]: 336 : << " offset=" << seekable_offset
4716 [ + - + - ]: 336 : << " mtime=" << seekable_mtime;
4717 [ + - ]: 666 : o << endl;
4718 : 666 : }
4719 : :
4720 [ + + + + ]: 2584 : }
4721 [ - - ]: 0 : catch (const reportable_exception& e)
4722 : : {
4723 [ - - ]: 0 : e.report(clog);
4724 : 0 : any_exceptions = true;
4725 : : // NB: but we allow the libarchive iteration to continue, in
4726 : : // case we can still gather some useful information. That
4727 : : // would allow some webapi queries to work, until later when
4728 : : // this archive is rescanned. (Its vitals won't go into the
4729 : : // _file_mtime_scanned table until after a successful scan.)
4730 : 0 : }
4731 : : }
4732 : :
4733 [ - + ]: 396 : if (any_exceptions)
4734 [ # # # # ]: 0 : throw reportable_exception("exceptions encountered during archive scan");
4735 [ + + ]: 420 : }
4736 : :
4737 : :
4738 : :
4739 : : // scan for archive files such as .rpm
4740 : : static void
4741 : 762 : scan_archive_file (const string& rps, const stat_t& st,
4742 : : sqlite_ps& ps_upsert_buildids,
4743 : : sqlite_ps& ps_upsert_fileparts,
4744 : : sqlite_ps& ps_upsert_file,
4745 : : sqlite_ps& ps_lookup_file,
4746 : : sqlite_ps& ps_upsert_de,
4747 : : sqlite_ps& ps_upsert_sref,
4748 : : sqlite_ps& ps_upsert_sdef,
4749 : : sqlite_ps& ps_upsert_seekable,
4750 : : sqlite_ps& ps_query,
4751 : : sqlite_ps& ps_scan_done,
4752 : : unsigned& fts_cached,
4753 : : unsigned& fts_executable,
4754 : : unsigned& fts_debuginfo,
4755 : : unsigned& fts_sref,
4756 : : unsigned& fts_sdef)
4757 : : {
4758 : : // intern the archive file name
4759 : 762 : int64_t archiveid = register_file_name (ps_upsert_fileparts, ps_upsert_file, ps_lookup_file, rps);
4760 : :
4761 : : /* See if we know of it already. */
4762 : 762 : int rc = ps_query
4763 : 762 : .reset()
4764 : 762 : .bind(1, archiveid)
4765 : 762 : .bind(2, st.st_mtime)
4766 : 762 : .step();
4767 : 762 : ps_query.reset();
4768 [ + + ]: 762 : if (rc == SQLITE_ROW) // i.e., a result, as opposed to DONE (no results)
4769 : : // no need to recheck a file/version we already know
4770 : : // specifically, no need to parse this archive again, since we already have
4771 : : // it as a D or E or S record,
4772 : : // (so is stored with buildid=NULL)
4773 : : {
4774 : 366 : fts_cached ++;
4775 : 366 : return;
4776 : : }
4777 : :
4778 : : // extract the archive contents
4779 : 396 : unsigned my_fts_executable = 0, my_fts_debuginfo = 0, my_fts_sref = 0, my_fts_sdef = 0;
4780 : 396 : bool my_fts_sref_complete_p = true;
4781 : 396 : bool any_exceptions = false;
4782 : 396 : try
4783 : : {
4784 [ + - ]: 396 : string archive_extension;
4785 : 396 : archive_classify (rps, archive_extension, archiveid,
4786 : : ps_upsert_buildids, ps_upsert_fileparts, ps_upsert_file, ps_lookup_file,
4787 : : ps_upsert_de, ps_upsert_sref, ps_upsert_sdef, ps_upsert_seekable, // dalt
4788 [ + - ]: 396 : st.st_mtime,
4789 : : my_fts_executable, my_fts_debuginfo, my_fts_sref, my_fts_sdef,
4790 : : my_fts_sref_complete_p);
4791 [ + - + - : 792 : add_metric ("scanned_bytes_total","source",archive_extension + " archive",
+ - - + +
+ - - -
- ]
4792 [ + - ]: 396 : st.st_size);
4793 [ + - + - : 792 : inc_metric ("scanned_files_total","source",archive_extension + " archive");
+ - + - -
+ + + - -
- - ]
4794 [ + - + - : 792 : add_metric("found_debuginfo_total","source",archive_extension + " archive",
+ - + - -
+ + + - -
- - ]
4795 : : my_fts_debuginfo);
4796 [ + - + - : 792 : add_metric("found_executable_total","source",archive_extension + " archive",
+ - + - -
+ + + - -
- - ]
4797 : : my_fts_executable);
4798 [ + - + - : 808 : add_metric("found_sourcerefs_total","source",archive_extension + " archive",
+ - + - -
+ + + - +
- - - - -
- ]
4799 : : my_fts_sref);
4800 : 396 : }
4801 [ - - ]: 0 : catch (const reportable_exception& e)
4802 : : {
4803 [ - - ]: 0 : e.report(clog);
4804 : 0 : any_except |