description = "Haskell dev MicroVM";
inputs.impermanence.url = "github:nix-community/impermanence";
inputs.microvm.url = "github:astro/microvm.nix";
inputs.microvm.inputs.nixpkgs.follows = "nixpkgs";
outputs = self, impermanence, microvm, nixpkgs :
let
persistencePath = "/persistent";
system = "x86_64-linux";
user = "thk";
vmname = "haskell";
nixosConfiguration = nixpkgs.lib.nixosSystem
inherit system;
modules = [
microvm.nixosModules.microvm
impermanence.nixosModules.impermanence
( pkgs, ... :
environment.persistence.$ persistencePath =
hideMounts = true;
users.$ user =
directories = [
"git" ".stack"
];
;
;
environment.sessionVariables =
TERM = "screen-256color";
;
environment.systemPackages = with pkgs; [
ghc
git
(haskell-language-server.override supportedGhcVersions = [ "94" ]; )
htop
stack
tmux
tree
vcsh
zsh
];
fileSystems.$ persistencePath .neededForBoot = nixpkgs.lib.mkForce true;
microvm =
forwardPorts = [
from = "host"; host.port = 2222; guest.port = 22;
from = "guest"; host.port = 5432; guest.port = 5432; # postgresql
];
hypervisor = "qemu";
interfaces = [
type = "user"; id = "usernet"; mac = "00:00:00:00:00:02";
];
mem = 4096;
shares = [
# use "virtiofs" for MicroVMs that are started by systemd
proto = "9p";
tag = "ro-store";
# a host's /nix/store will be picked up so that no
# squashfs/erofs will be built for it.
source = "/nix/store";
mountPoint = "/nix/.ro-store";
proto = "virtiofs";
tag = "persistent";
source = "~/.local/share/microvm/vms/$ vmname /persistent";
mountPoint = persistencePath;
socket = "/run/user/1000/microvm-$ vmname -persistent";
];
socket = "/run/user/1000/microvm-control.socket";
vcpu = 3;
volumes = [];
writableStoreOverlay = "/nix/.rwstore";
;
networking.hostName = vmname;
nix.enable = true;
nix.nixPath = ["nixpkgs=$ builtins.storePath <nixpkgs> "];
nix.settings =
extra-experimental-features = ["nix-command" "flakes"];
trusted-users = [user];
;
security.sudo =
enable = true;
wheelNeedsPassword = false;
;
services.getty.autologinUser = user;
services.openssh =
enable = true;
;
system.stateVersion = "24.11";
systemd.services.loadnixdb =
description = "import hosts nix database";
path = [pkgs.nix];
wantedBy = ["multi-user.target"];
requires = ["nix-daemon.service"];
script = "cat $ persistencePath /nix-store-db-dump nix-store --load-db";
;
time.timeZone = nixpkgs.lib.mkDefault "Europe/Berlin";
users.users.$ user =
extraGroups = [ "wheel" "video" ];
group = "user";
isNormalUser = true;
openssh.authorizedKeys.keys = [
"ssh-rsa REDACTED"
];
password = "";
;
users.users.root.password = "";
users.groups.user = ;
)
];
;
in
packages.$ system .default = nixosConfiguration.config.microvm.declaredRunner;
;
I start the microVM with a templated systemd user service:
[Unit]
Description=MicroVM for Haskell development
Requires=microvm-virtiofsd-persistent@.service
After=microvm-virtiofsd-persistent@.service
AssertFileNotEmpty=%h/.local/share/microvm/vms/%i/flake/flake.nix
[Service]
Type=forking
ExecStartPre=/usr/bin/sh -c "[ /nix/var/nix/db/db.sqlite -ot %h/.local/share/microvm/nix-store-db-dump ] nix-store --dump-db >%h/.local/share/microvm/nix-store-db-dump"
ExecStartPre=ln -f -t %h/.local/share/microvm/vms/%i/persistent/ %h/.local/share/microvm/nix-store-db-dump
ExecStartPre=-%h/.local/state/nix/profile/bin/tmux new -s microvm -d
ExecStart=%h/.local/state/nix/profile/bin/tmux new-window -t microvm: -n "%i" "exec %h/.local/state/nix/profile/bin/nix run --impure %h/.local/share/microvm/vms/%i/flake"
The above service definition creates a dump of the hosts nix store db so that it can be imported in the guest. This is necessary so that the guest can actually use what is available in /nix/store. There is an effort for an overlayed nix store that would be preferable to this hack.
Finally the microvm is started inside a tmux session named microvm . This way I can use the VM with SSH or through the console and also access the qemu console.
And for completeness the virtiofsd service:
[Unit]
Description=serve host persistent folder for dev VM
AssertPathIsDirectory=%h/.local/share/microvm/vms/%i/persistent
[Service]
ExecStart=%h/.local/state/nix/profile/bin/virtiofsd \
--socket-path=$ XDG_RUNTIME_DIR /microvm-%i-persistent \
--shared-dir=%h/.local/share/microvm/vms/%i/persistent \
--gid-map :995:%G:1: \
--uid-map :1000:%U:1:
$HOME/.config
.config/nixpkgs/config.nix
1:
with (import <nixpkgs> );
packageOverrides = pkgs: with pkgs;
thk-emacsWithPackages = (pkgs.emacsPackagesFor emacs-gtk).emacsWithPackages (
epkgs:
(with epkgs.elpaPackages; [
ace-window
company
org
use-package
]) ++ (with epkgs.melpaPackages; [
editorconfig
flycheck
haskell-mode
magit
nix-mode
paredit
rainbow-delimiters
treemacs
visual-fill-column
yasnippet-snippets
]) ++ [ # From main packages set
]
);
userPackages = buildEnv
extraOutputsToInstall = [ "doc" "info" "man" ];
name = "user-packages";
paths = [
ghc
git
(pkgs.haskell-language-server.override supportedGhcVersions = [ "94" ]; )
nix
stack
thk-emacsWithPackages
tmux
vcsh
virtiofsd
];
;
;
/etc/systemd/system/nix-daemon.service.d/override.conf
:
[Service]
ExecStart=
ExecStart=@/home/thk/.local/state/nix/profile/bin/nix-daemon nix-daemon --daemon
nixpkgs
in the path. This is not a config file for nix
the package manager but for the nix package collection. See the nixpkgs manual. Series: | Discworld #37 |
Publisher: | Harper |
Copyright: | October 2009 |
Printing: | November 2014 |
ISBN: | 0-06-233500-6 |
Format: | Mass market |
Pages: | 517 |
Glenda enjoyed her job. She didn't have a career; they were for people who could not hold down jobs.She is the kind of person who knows where she fits in the world and likes what she does and is happy to stay there until she decides something isn't right, and then she changes the world through the power of common sense morality, righteous indignation, and sheer stubborn persistence. Discworld is full of complex and subtle characters fencing with each other, but there are few things I have enjoyed more than Glenda being a determinedly good person. Vetinari of course recognizes and respects (and uses) that inner core immediately. Unfortunately, as great as the setup and characters are, Unseen Academicals falls apart a bit at the end. I was eagerly reading the story, wondering what Pratchett was going to weave out of the stories of these individuals, and then it partly turned into yet another wizard book. Pratchett pulled another of his deus ex machina tricks for the climax in a way that I found unsatisfying and contrary to the tone of the rest of the story, and while the characters do get reasonable endings, it lacked the oomph I was hoping for. Rincewind is as determinedly one-note as ever, the wizards do all the standard wizard things, and the plot just isn't that interesting. I liked Mr. Nutt a great deal in the first part of the book, and I wish he could have kept that edge of enigmatic competence and unflappableness. Pratchett wanted to tell a different story that involved more angst and self-doubt, and while I appreciate that story, I found it less engaging and a bit more melodramatic than I was hoping for. Mr. Nutt's reactions in the last half of the book were believable and fit his background, but that was part of the problem: he slotted back into an archetype that I thought Pratchett was going to twist and upend. Mr. Nutt does, at least, get a fantastic closing line, and as usual there are a lot of great asides and quotes along the way, including possibly the sharpest and most biting Vetinari speech of the entire series.
The Patrician took a sip of his beer. "I have told this to few people, gentlemen, and I suspect never will again, but one day when I was a young boy on holiday in Uberwald I was walking along the bank of a stream when I saw a mother otter with her cubs. A very endearing sight, I'm sure you will agree, and even as I watched, the mother otter dived into the water and came up with a plump salmon, which she subdued and dragged on to a half-submerged log. As she ate it, while of course it was still alive, the body split and I remember to this day the sweet pinkness of its roes as they spilled out, much to the delight of the baby otters who scrambled over themselves to feed on the delicacy. One of nature's wonders, gentlemen: mother and children dining on mother and children. And that's when I first learned about evil. It is built into the very nature of the universe. Every world spins in pain. If there is any kind of supreme being, I told myself, it is up to all of us to become his moral superior."My dissatisfaction with the ending prevents Unseen Academicals from rising to the level of Night Watch, and it's a bit more uneven than the best books of the series. Still, though, this is great stuff; recommended to anyone who is reading the series. Followed in publication order by I Shall Wear Midnight. Rating: 8 out of 10
Courtesy of my CRANberries, there is a diffstat report relative to previous release. More detailed information is on the RcppArmadillo page. Questions, comments etc should go to the rcpp-devel mailing list off the Rcpp R-Forge page. If you like this or other open-source work I do, you can sponsor me at GitHub.Changes in RcppArmadillo version 0.12.8.2.1 (2024-04-15)
- One-char bug fix release commenting out one test that upsets reticulate when accessing a scipy sparse matrix
This post by Dirk Eddelbuettel originated on his Thinking inside the box blog. Please report excessive re-aggregation in third-party for-profit settings.
git clone https://gitlab.com/gsasl/libntlm.git
cd libntlm
git checkout v1.8
./bootstrap
./configure
make distcheck
gpg -b libntlm-1.8.tar.gz
The generated files libntlm-1.8.tar.gz
and libntlm-1.8.tar.gz.sig
are published, and users download and use them. This is how the GNU project have been doing releases since the late 1980 s. That is a testament to how successful this pattern has been! These tarballs contain source code and some generated files, typically shell scripts generated by autoconf, makefile templates generated by automake, documentation in formats like Info, HTML, or PDF. Rarely do they contain binary object code, but historically that happened.
The XZUtils incident illustrate that tarballs with files that are not included in the git archive offer an opportunity to disguise malicious backdoors. I blogged earlier how to mitigate this risk by using signed minimal source-only tarballs.
The risk of hiding malware is not the only motivation to publish signed minimal source-only tarballs. With pre-generated content in tarballs, there is a risk that GNU/Linux distributions such as Trisquel, Guix, Debian/Ubuntu or Fedora ship generated files coming from the tarball into the binary *.deb
or *.rpm
package file. Typically the person packaging the upstream project never realized that some installed artifacts was not re-built through a typical autoconf -fi && ./configure && make install
sequence, and never wrote the code to rebuild everything. This can also happen if the build rules are written but are buggy, shipping the old artifact. When a security problem is found, this can lead to time-consuming situations, as it may be that patching the relevant source code and rebuilding the package is not sufficient: the vulnerable generated object from the tarball would be shipped into the binary package instead of a rebuilt artifact. For architecture-specific binaries this rarely happens, since object code is usually not included in tarballs although for 10+ years I shipped the binary Java JAR file in the GNU Libidn release tarball, until I stopped shipping it. For interpreted languages and especially for generated content such as HTML, PDF, shell scripts this happens more than you would like.
Publishing minimal source-only tarballs enable easier auditing of a project s code, to avoid the need to read through all generated files looking for malicious content. I have taken care to generate the source-only minimal tarball using git-archive
. This is the same format that GitLab, GitHub etc offer for the automated download links on git tags. The minimal source-only tarballs can thus serve as a way to audit GitLab and GitHub download material! Consider if/when hosting sites like GitLab or GitHub has a security incident that cause generated tarballs to include a backdoor that is not present in the git repository. If people rely on the tag download artifact without verifying the maintainer PGP signature using GnuPG, this can lead to similar backdoor scenarios that we had for XZUtils but originated with the hosting provider instead of the release manager. This is even more concerning, since this attack can be mounted for some selected IP address that you want to target and not on everyone, thereby making it harder to discover.
With all that discussion and rationale out of the way, let s return to the release process. I have added another step here:
make srcdist
gpg -b libntlm-1.8-src.tar.gz
Now the release is ready. I publish these four files in the Libntlm s Savannah Download area, but they can be uploaded to a GitLab/GitHub release area as well. These are the SHA256 checksums I got after building the tarballs on my Trisquel 11 aramo laptop:
91de864224913b9493c7a6cec2890e6eded3610d34c3d983132823de348ec2ca libntlm-1.8-src.tar.gz
ce6569a47a21173ba69c990965f73eb82d9a093eb871f935ab64ee13df47fda1 libntlm-1.8.tar.gz
So how can you reproduce my artifacts? Here is how to reproduce them in a Ubuntu 22.04 container:
podman run -it --rm ubuntu:22.04
apt-get update
apt-get install -y --no-install-recommends autoconf automake libtool make git ca-certificates
git clone https://gitlab.com/gsasl/libntlm.git
cd libntlm
git checkout v1.8
./bootstrap
./configure
make dist srcdist
sha256sum libntlm-*.tar.gz
You should see the exact same SHA256 checksum values. Hooray!
This works because Trisquel 11 and Ubuntu 22.04 uses the same version of git, autoconf, automake, and libtool. These tools do not guarantee the same output content for all versions, similar to how GNU GCC does not generate the same binary output for all versions. So there is still some delicate version pairing needed.
Ideally, the artifacts should be possible to reproduce from the release artifacts themselves, and not only directly from git. It is possible to reproduce the full tarball in a AlmaLinux 8 container replace almalinux:8
with rockylinux:8
if you prefer RockyLinux:
podman run -it --rm almalinux:8
dnf update -y
dnf install -y make wget gcc
wget https://download.savannah.nongnu.org/releases/libntlm/libntlm-1.8.tar.gz
tar xfa libntlm-1.8.tar.gz
cd libntlm-1.8
./configure
make dist
sha256sum libntlm-1.8.tar.gz
The source-only minimal tarball can be regenerated on Debian 11:
podman run -it --rm debian:11
apt-get update
apt-get install -y --no-install-recommends make git ca-certificates
git clone https://gitlab.com/gsasl/libntlm.git
cd libntlm
git checkout v1.8
make -f cfg.mk srcdist
sha256sum libntlm-1.8-src.tar.gz
As the Magnus Opus or chef-d uvre, let s recreate the full tarball directly from the minimal source-only tarball on Trisquel 11 replace docker.io/kpengboy/trisquel:11.0
with ubuntu:22.04
if you prefer.
podman run -it --rm docker.io/kpengboy/trisquel:11.0
apt-get update
apt-get install -y --no-install-recommends autoconf automake libtool make wget git ca-certificates
wget https://download.savannah.nongnu.org/releases/libntlm/libntlm-1.8-src.tar.gz
tar xfa libntlm-1.8-src.tar.gz
cd libntlm-v1.8
./bootstrap
./configure
make dist
sha256sum libntlm-1.8.tar.gz
Yay! You should now have great confidence in that the release artifacts correspond to what s in version control and also to what the maintainer intended to release. Your remaining job is to audit the source code for vulnerabilities, including the source code of the dependencies used in the build. You no longer have to worry about auditing the release artifacts.
I find it somewhat amusing that the build infrastructure for Libntlm is now in a significantly better place than the code itself. Libntlm is written in old C style with plenty of string manipulation and uses broken cryptographic algorithms such as MD4 and single-DES. Remember folks: solving supply chain security issues has no bearing on what kind of code you eventually run. A clean gun can still shoot you in the foot.
Side note on naming: GitLab exports tarballs with pathnames libntlm-v1.8/
(i.e.., PROJECT-TAG/
) and I ve adopted the same pathnames, which means my libntlm-1.8-src.tar.gz
tarballs are bit-by-bit identical to GitLab s exports and you can verify this with tools like diffoscope. GitLab name the tarball libntlm-v1.8.tar.gz
(i.e., PROJECT-TAG.ARCHIVE
) which I find too similar to the libntlm-1.8.tar.gz
that we also publish. GitHub uses the same git archive style, but unfortunately they have logic that removes the v in the pathname so you will get a tarball with pathname libntlm-1.8/
instead of libntlm-v1.8/
that GitLab and I use. The content of the tarball is bit-by-bit identical, but the pathname and archive differs. Codeberg (running Forgejo) uses another approach: the tarball is called libntlm-v1.8.tar.gz
(after the tag) just like GitLab, but the pathname inside the archive is libntlm/
, otherwise the produced archive is bit-by-bit identical including timestamps. Savannah s CGIT interface uses archive name libntlm-1.8.tar.gz
with pathname libntlm-1.8/
, but otherwise file content is identical. Savannah s GitWeb interface provides snapshot links that are named after the git commit (e.g., libntlm-a812c2ca.tar.gz
with libntlm-a812c2ca/
) and I cannot find any tag-based download links at all. Overall, we are so close to get SHA256 checksum to match, but fail on pathname within the archive. I ve chosen to be compatible with GitLab regarding the content of tarballs but not on archive naming. From a simplicity point of view, it would be nice if everyone used PROJECT-TAG.ARCHIVE
for the archive filename and PROJECT-TAG/
for the pathname within the archive. This aspect will probably need more discussion.
Side note on git archive output: It seems different versions of git archive produce different results for the same repository. The version of git in Debian 11, Trisquel 11 and Ubuntu 22.04 behave the same. The version of git in Debian 12, AlmaLinux/RockyLinux 8/9, Alpine, ArchLinux, macOS homebrew, and upcoming Ubuntu 24.04 behave in another way. Hopefully this will not change that often, but this would invalidate reproducibility of these tarballs in the future, forcing you to use an old git release to reproduce the source-only tarball. Alas, GitLab and most other sites appears to be using modern git so the download tarballs from them would not match my tarballs even though the content would.
Side note on ChangeLog: ChangeLog files were traditionally manually curated files with version history for a package. In recent years, several projects moved to dynamically generate them from git history (using tools like git2cl or gitlog-to-changelog). This has consequences for reproducibility of tarballs: you need to have the entire git history available! The gitlog-to-changelog
tool also output different outputs depending on the time zone of the person using it, which arguable is a simple bug that can be fixed. However this entire approach is incompatible with rebuilding the full tarball from the minimal source-only tarball. It seems Libntlm s ChangeLog file died on the surgery table here.
So how would a distribution build these minimal source-only tarballs? I happen to help on the libntlm package in Debian. It has historically used the generated tarballs as the source code to build from. This means that code coming from gnulib is vendored in the tarball. When a security problem is discovered in gnulib code, the security team needs to patch all packages that include that vendored code and rebuild them, instead of merely patching the gnulib package and rebuild all packages that rely on that particular code. To change this, the Debian libntlm package needs to Build-Depends on Debian s gnulib package. But there was one problem: similar to most projects that use gnulib, Libntlm depend on a particular git commit of gnulib, and Debian only ship one commit. There is no coordination about which commit to use. I have adopted gnulib in Debian, and add a git bundle to the *_all.deb
binary package so that projects that rely on gnulib can pick whatever commit they need. This allow an no-network GNULIB_URL
and GNULIB_REVISION
approach when running Libntlm s ./bootstrap
with the Debian gnulib package installed. Otherwise libntlm would pick up whatever latest version of gnulib that Debian happened to have in the gnulib package, which is not what the Libntlm maintainer intended to be used, and can lead to all sorts of version mismatches (and consequently security problems) over time. Libntlm in Debian is developed and tested on Salsa and there is continuous integration testing of it as well, thanks to the Salsa CI team.
Side note on git bundles: unfortunately there appears to be no reproducible way to export a git repository into one or more files. So one unfortunate consequence of all this work is that the gnulib *.orig.tar.gz
tarball in Debian is not reproducible any more. I have tried to get Git bundles to be reproducible but I never got it to work see my notes in gnulib s debian/README.source on this aspect. Of course, source tarball reproducibility has nothing to do with binary reproducibility of gnulib in Debian itself, fortunately.
One open question is how to deal with the increased build dependencies that is triggered by this approach. Some people are surprised by this but I don t see how to get around it: if you depend on source code for tools in another package to build your package, it is a bad idea to hide that dependency. We ve done it for a long time through vendored code in non-minimal tarballs. Libntlm isn t the most critical project from a bootstrapping perspective, so adding git and gnulib as Build-Depends
to it will probably be fine. However, consider if this pattern was used for other packages that uses gnulib such as coreutils, gzip, tar, bison etc (all are using gnulib) then they would all Build-Depends
on git and gnulib. Cross-building those packages for a new architecture will therefor require git on that architecture first, which gets circular quick. The dependency on gnulib is real so I don t see that going away, and gnulib is a Architecture:all
package. However, the dependency on git is merely a consequence of how the Debian gnulib package chose to make all gnulib git commits available to projects: through a git bundle. There are other ways to do this that doesn t require the git tool to extract the necessary files, but none that I found practical ideas welcome!
Finally some brief notes on how this was implemented. Enabling bootstrappable source-only minimal tarballs via gnulib s ./bootstrap
is achieved by using the GNULIB_REVISION
mechanism, locking down the gnulib commit used. I have always disliked git submodules because they add extra steps and has complicated interaction with CI/CD. The reason why I gave up git submodules now is because the particular commit to use is not recorded in the git archive
output when git submodules is used. So the particular gnulib commit has to be mentioned explicitly in some source code that goes into the git archive tarball. Colin Watson added the GNULIB_REVISION approach to ./bootstrap
back in 2018, and now it no longer made sense to continue to use a gnulib git submodule. One alternative is to use ./bootstrap
with --gnulib-srcdir
or --gnulib-refdir
if there is some practical problem with the GNULIB_URL
towards a git bundle the GNULIB_REVISION
in bootstrap.conf
.
The srcdist make rule is simple:
git archive --prefix=libntlm-v1.8/ -o libntlm-v1.8.tar.gz HEAD
Making the make dist
generated tarball reproducible can be more complicated, however for Libntlm it was sufficient to make sure the modification times of all files were set deterministically to the timestamp of the last commit in the git repository. Interestingly there seems to be a couple of different ways to accomplish this, Guix doesn t support minimal source-only tarballs but rely on a .tarball-timestamp file inside the tarball. Paul Eggert explained what TZDB is using some time ago. The approach I m using now is fairly similar to the one I suggested over a year ago. If there are problems because all files in the tarball now use the same modification time, there is a solution by Bruno Haible that could be implemented.
Side note on git tags: Some people may wonder why not verify a signed git tag instead of verifying a signed tarball of the git archive. Currently most git repositories uses SHA-1 for git commit identities, but SHA-1 is not a secure hash function. While current SHA-1 attacks can be detected and mitigated, there are fundamental doubts that a git SHA-1 commit identity uniquely refers to the same content that was intended. Verifying a git tag will never offer the same assurance, since a git tag can be moved or re-signed at any time. Verifying a git commit is better but then we need to trust SHA-1. Migrating git to SHA-256 would resolve this aspect, but most hosting sites such as GitLab and GitHub does not support this yet. There are other advantages to using signed tarballs instead of signed git commits or git tags as well, e.g., tar.gz
can be a deterministically reproducible persistent stable offline storage format but .git
sub-directory trees or git bundles do not offer this property.
Doing continous testing of all this is critical to make sure things don t regress. Libntlm s pipeline definition now produce the generated libntlm-*.tar.gz
tarballs and a checksum as a build artifact. Then I added the 000-reproducability
job which compares the checksums and fails on mismatches. You can read its delicate output in the job for the v1.8 release. Right now we insists that builds on Trisquel 11 match Ubuntu 22.04, that PureOS 10 builds match Debian 11 builds, that AlmaLinux 8 builds match RockyLinux 8 builds, and AlmaLinux 9 builds match RockyLinux 9 builds. As you can see in pipeline job output, not all platforms lead to the same tarballs, but hopefully this state can be improved over time. There is also partial reproducibility, where the full tarball is reproducible across two distributions but not the minimal tarball, or vice versa.
If this way of working plays out well, I hope to implement it in other projects too.
What do you think? Happy Hacking!
build-id
(more specifically, the ELF note
.note.gnu.build-id
) wound up in the Debian apt archive metadata.
I ve always thought this was super cool, and seeing as how Michael Stapelberg
blogged
some great pointers around the ecosystem, including the fancy new debuginfod
service, and the
find-dbgsym-packages
helper, which uses these same headers, I don t think I m the only one.
At work I ve been using a lot of rust,
specifically, async rust using tokio. To try and work on
my style, and to dig deeper into the how and why of the decisions made in these
frameworks, I ve decided to hack up a project that I ve wanted to do ever
since 2015 write a debug filesystem. Let s get to it.
9p
, the network protocol for serving
a filesystem over a network. This leads to all sorts of fun programs, like the
Plan 9 ftp
client being a 9p server you mount the ftp server and access
files like any other files. It s kinda like if fuse were more fully a part
of how the operating system worked, but fuse is all running client-side. With
9p there s a single client, and different servers that you can connect to,
which may be backed by a hard drive, remote resources over something like SFTP, FTP, HTTP or even purely synthetic.
The interesting (maybe sad?) part here is that 9p wound up outliving Plan 9
in terms of adoption 9p
is in all sorts of places folks don t usually expect.
For instance, the Windows Subsystem for Linux uses the 9p protocol to share
files between Windows and Linux. ChromeOS uses it to share files with Crostini,
and qemu uses 9p (virtio-p9
) to share files between guest and host. If you re
noticing a pattern here, you d be right; for some reason 9p is the go-to protocol
to exchange files between hypervisor and guest. Why? I have no idea, except maybe
due to being designed well, simple to implement, and it s a lot easier to validate the data being shared
and validate security boundaries. Simplicity has its value.
As a result, there s a lot of lingering 9p support kicking around. Turns out
Linux can even handle mounting 9p filesystems out of the box. This means that I
can deploy a filesystem to my LAN or my localhost
by running a process on top
of a computer that needs nothing special, and mount it over the network on an
unmodified machine unlike fuse
, where you d need client-specific software
to run in order to mount the directory. For instance, let s mount a 9p
filesystem running on my localhost machine, serving requests on 127.0.0.1:564
(tcp) that goes by the name mountpointname
to /mnt
.
$ mount -t 9p \ -o trans=tcp,port=564,version=9p2000.u,aname=mountpointname \ 127.0.0.1 \ /mntLinux will mount away, and attach to the filesystem as the root user, and by default, attach to that mountpoint again for each local user that attempts to use it. Nifty, right? I think so. The server is able to keep track of per-user access and authorization along with the host OS.
rust
and tokio
specifically,
I opted to implement the whole stack myself, without third party libraries on
the critical path where I could avoid it. The 9p protocol (sometimes called
Styx
, the original name for it) is incredibly simple. It s a series of client
to server requests, which receive a server to client response. These are,
respectively, T
messages, which t
ransmit a request to the server, which
trigger an R
message in response (R
eply messages). These messages are
TLV payload
with a very straight forward structure so straight forward, in fact, that I
was able to implement a working server off nothing more than a handful of man
pages.
Later on after the basics worked, I found a more complete
spec page
that contains more information about the
unix specific variant
that I opted to use (9P2000.u
rather than 9P2000
) due to the level
of Linux
specific support for the 9P2000.u
variant over the 9P2000
protocol.
rust
and tokio
running i/o for an HTTP
and WebRTC
server. I figured I d pick something
fairly similar to write my filesystem with, since 9P
can be implemented
on basically anything with I/O. That means tokio
tcp server bits, which
construct and use a 9p
server, which has an idiomatic Rusty API that
partially abstracts the raw R
and T
messages, but not so much as to
cause issues with hiding implementation possibilities. At each abstraction
level, there s an escape hatch allowing someone to implement any of
the layers if required. I called this framework
arigato which can be found over on
docs.rs and
crates.io.
/// Simplified version of the arigato File trait; this isn't actually
/// the same trait; there's some small cosmetic differences. The
/// actual trait can be found at:
///
/// https://docs.rs/arigato/latest/arigato/server/trait.File.html
trait File
/// OpenFile is the type returned by this File via an Open call.
type OpenFile: OpenFile;
/// Return the 9p Qid for this file. A file is the same if the Qid is
/// the same. A Qid contains information about the mode of the file,
/// version of the file, and a unique 64 bit identifier.
fn qid(&self) -> Qid;
/// Construct the 9p Stat struct with metadata about a file.
async fn stat(&self) -> FileResult<Stat>;
/// Attempt to update the file metadata.
async fn wstat(&mut self, s: &Stat) -> FileResult<()>;
/// Traverse the filesystem tree.
async fn walk(&self, path: &[&str]) -> FileResult<(Option<Self>, Vec<Self>)>;
/// Request that a file's reference be removed from the file tree.
async fn unlink(&mut self) -> FileResult<()>;
/// Create a file at a specific location in the file tree.
async fn create(
&mut self,
name: &str,
perm: u16,
ty: FileType,
mode: OpenMode,
extension: &str,
) -> FileResult<Self>;
/// Open the File, returning a handle to the open file, which handles
/// file i/o. This is split into a second type since it is genuinely
/// unrelated -- and the fact that a file is Open or Closed can be
/// handled by the arigato server for us.
async fn open(&mut self, mode: OpenMode) -> FileResult<Self::OpenFile>;
/// Simplified version of the arigato OpenFile trait; this isn't actually
/// the same trait; there's some small cosmetic differences. The
/// actual trait can be found at:
///
/// https://docs.rs/arigato/latest/arigato/server/trait.OpenFile.html
trait OpenFile
/// iounit to report for this file. The iounit reported is used for Read
/// or Write operations to signal, if non-zero, the maximum size that is
/// guaranteed to be transferred atomically.
fn iounit(&self) -> u32;
/// Read some number of bytes up to buf.len() from the provided
/// offset of the underlying file. The number of bytes read is
/// returned.
async fn read_at(
&mut self,
buf: &mut [u8],
offset: u64,
) -> FileResult<u32>;
/// Write some number of bytes up to buf.len() from the provided
/// offset of the underlying file. The number of bytes written
/// is returned.
fn write_at(
&mut self,
buf: &mut [u8],
offset: u64,
) -> FileResult<u32>;
arigato
to implement a 9p
filesystem we ll call
debugfs that will serve all the debug
files shipped according to the Packages
metadata from the apt
archive. We ll
fetch the Packages
file and construct a filesystem based on the reported
Build-Id
entries. For those who don t know much about how an apt
repo
works, here s the 2-second crash course on what we re doing. The first is to
fetch the Packages
file, which is specific to a binary architecture (such as
amd64
, arm64
or riscv64
). That architecture
is specific to a
component
(such as main
, contrib
or non-free
). That component
is
specific to a suite
, such as stable
, unstable
or any of its aliases
(bullseye
, bookworm
, etc). Let s take a look at the Packages.xz
file for
the unstable-debug
suite
, main
component
, for all amd64
binaries.
$ curl \
https://deb.debian.org/debian-debug/dists/unstable-debug/main/binary-amd64/Packages.xz \
unxz
This will return the Debian-style
rfc2822-like headers,
which is an export of the metadata contained inside each .deb
file which
apt
(or other tools that can use the apt
repo format) use to fetch
information about debs. Let s take a look at the debug headers for the
netlabel-tools
package in unstable
which is a package named
netlabel-tools-dbgsym
in unstable-debug
.
Package: netlabel-tools-dbgsym
Source: netlabel-tools (0.30.0-1)
Version: 0.30.0-1+b1
Installed-Size: 79
Maintainer: Paul Tagliamonte <paultag@debian.org>
Architecture: amd64
Depends: netlabel-tools (= 0.30.0-1+b1)
Description: debug symbols for netlabel-tools
Auto-Built-Package: debug-symbols
Build-Ids: e59f81f6573dadd5d95a6e4474d9388ab2777e2a
Description-md5: a0e587a0cf730c88a4010f78562e6db7
Section: debug
Priority: optional
Filename: pool/main/n/netlabel-tools/netlabel-tools-dbgsym_0.30.0-1+b1_amd64.deb
Size: 62776
SHA256: 0e9bdb087617f0350995a84fb9aa84541bc4df45c6cd717f2157aa83711d0c60
So here, we can parse the package headers in the Packages.xz
file, and store,
for each Build-Id
, the Filename
where we can fetch the .deb
at. Each
.deb
contains a number of files but we re only really interested in the
files inside the .deb
located at or under /usr/lib/debug/.build-id/
,
which you can find in debugfs
under
rfc822.rs. It s
crude, and very single-purpose, but I m feeling a bit lazy.
.deb
file is a special type of
.ar file, that contains (usually)
three files inside debian-binary
, control.tar.xz
and data.tar.xz
.
The core of an .ar
file is a fixed size (60 byte
) entry header,
followed by the specified size
number of bytes.
[8 byte .ar file magic]
[60 byte entry header]
[N bytes of data]
[60 byte entry header]
[N bytes of data]
[60 byte entry header]
[N bytes of data]
...
First up was to implement a basic ar
parser in
ar.rs. Before we get
into using it to parse a deb, as a quick diversion, let s break apart a .deb
file by hand something that is a bit of a rite of passage (or at least it
used to be? I m getting old) during the Debian nm (new member) process, to take
a look at where exactly the .debug
file lives inside the .deb
file.
$ ar x netlabel-tools-dbgsym_0.30.0-1+b1_amd64.deb
$ ls
control.tar.xz debian-binary
data.tar.xz netlabel-tools-dbgsym_0.30.0-1+b1_amd64.deb
$ tar --list -f data.tar.xz grep '.debug$'
./usr/lib/debug/.build-id/e5/9f81f6573dadd5d95a6e4474d9388ab2777e2a.debug
Since we know quite a bit about the structure of a .deb
file, and I had to
implement support from scratch anyway, I opted to implement a (very!) basic
debfile parser using HTTP Range requests. HTTP Range requests, if supported by
the server (denoted by a accept-ranges: bytes
HTTP header in response to an
HTTP HEAD
request to that file) means that we can add a header such as
range: bytes=8-68
to specifically request that the returned GET
body be the
byte range provided (in the above case, the bytes starting from byte offset 8
until byte offset 68
). This means we can fetch just the ar file entry from
the .deb
file until we get to the file inside the .deb
we are interested in
(in our case, the data.tar.xz
file) at which point we can request the body
of that file with a final range
request. I wound up writing a struct to
handle a read_at
-style API surface in
hrange.rs, which
we can pair with ar.rs
above and start to find our data in the .deb
remotely
without downloading and unpacking the .deb
at all.
After we have the body of the data.tar.xz
coming back through the HTTP
response, we get to pipe it through an xz
decompressor (this kinda sucked in
Rust, since a tokio
AsyncRead
is not the same as an http
Body response is
not the same as std::io::Read
, is not the same as an async (or sync)
Iterator
is not the same as what the xz2
crate expects; leading me to read
blocks of data to a buffer and stuff them through the decoder by looping over
the buffer for each lzma2
packet in a loop), and tar
file parser (similarly
troublesome). From there we get to iterate over all entries in the tarfile,
stopping when we reach our file of interest. Since we can t seek, but gdb
needs to, we ll pull it out of the stream into a Cursor<Vec<u8>>
in-memory
and pass a handle to it back to the user.
From here on out its a matter of
gluing together a File traited struct
in debugfs
, and serving the filesystem over TCP using arigato
. Done
deal!
xz
file. What s interesting is xz
has a great primitive
to solve this specific problem (specifically, use a block size that allows you
to seek to the block as close to your desired seek position just before it,
only discarding at most block size - 1
bytes), but data.tar.xz
files
generated by dpkg
appear to have a single mega-huge block for the whole file.
I don t know why I would have expected any different, in retrospect. That means
that this now devolves into the base case of How do I seek around an lzma2
compressed data stream ; which is a lot more complex of a question.
Thankfully, notoriously brilliant tianon was
nice enough to introduce me to Jon Johnson
who did something super similar adapted a technique to seek inside a
compressed gzip
file, which lets his service
oci.dag.dev
seek through Docker container images super fast based on some prior work
such as soci-snapshotter
, gztool
, and
zran.c.
He also pulled this party trick off for apk based distros
over at apk.dag.dev, which seems apropos.
Jon was nice enough to publish a lot of his work on this specifically in a
central place under the name targz
on his GitHub, which has been a ton of fun to read through.
The gist is that, by dumping the decompressor s state (window of previous
bytes, in-memory data derived from the last N-1 bytes
) at specific
checkpoints along with the compressed data stream offset in bytes and
decompressed offset in bytes, one can seek to that checkpoint in the compressed
stream and pick up where you left off creating a similar block mechanism
against the wishes of gzip. It means you d need to do an O(n)
run over the
file, but every request after that will be sped up according to the number
of checkpoints you ve taken.
Given the complexity of xz
and lzma2
, I don t think this is possible
for me at the moment especially given most of the files I ll be requesting
will not be loaded from again especially when I can just cache the debug
header by Build-Id
. I want to implement this (because I m generally curious
and Jon has a way of getting someone excited about compression schemes, which
is not a sentence I thought I d ever say out loud), but for now I m going to
move on without this optimization. Such a shame, since it kills a lot of the
work that went into seeking around the .deb
file in the first place, given
the debian-binary
and control.tar.gz
members are so small.
debugfs
out for a spin! First, we need
to mount the filesystem. It even works on an entirely unmodified, stock
Debian box on my LAN, which is huge. Let s take it for a spin:
$ mount \
-t 9p \
-o trans=tcp,version=9p2000.u,aname=unstable-debug \
192.168.0.2 \
/usr/lib/debug/.build-id/
And, let s prove to ourselves that this actually mounted before we go
trying to use it:
$ mount grep build-id
192.168.0.2 on /usr/lib/debug/.build-id type 9p (rw,relatime,aname=unstable-debug,access=user,trans=tcp,version=9p2000.u,port=564)
Slick. We ve got an open connection to the server, where our host
will keep a connection alive as root, attached to the filesystem provided
in aname
. Let s take a look at it.
$ ls /usr/lib/debug/.build-id/
00 0d 1a 27 34 41 4e 5b 68 75 82 8E 9b a8 b5 c2 CE db e7 f3
01 0e 1b 28 35 42 4f 5c 69 76 83 8f 9c a9 b6 c3 cf dc E7 f4
02 0f 1c 29 36 43 50 5d 6a 77 84 90 9d aa b7 c4 d0 dd e8 f5
03 10 1d 2a 37 44 51 5e 6b 78 85 91 9e ab b8 c5 d1 de e9 f6
04 11 1e 2b 38 45 52 5f 6c 79 86 92 9f ac b9 c6 d2 df ea f7
05 12 1f 2c 39 46 53 60 6d 7a 87 93 a0 ad ba c7 d3 e0 eb f8
06 13 20 2d 3a 47 54 61 6e 7b 88 94 a1 ae bb c8 d4 e1 ec f9
07 14 21 2e 3b 48 55 62 6f 7c 89 95 a2 af bc c9 d5 e2 ed fa
08 15 22 2f 3c 49 56 63 70 7d 8a 96 a3 b0 bd ca d6 e3 ee fb
09 16 23 30 3d 4a 57 64 71 7e 8b 97 a4 b1 be cb d7 e4 ef fc
0a 17 24 31 3e 4b 58 65 72 7f 8c 98 a5 b2 bf cc d8 E4 f0 fd
0b 18 25 32 3f 4c 59 66 73 80 8d 99 a6 b3 c0 cd d9 e5 f1 fe
0c 19 26 33 40 4d 5a 67 74 81 8e 9a a7 b4 c1 ce da e6 f2 ff
Outstanding. Let s try using gdb
to debug a binary that was provided by
the Debian
archive, and see if it ll load the ELF by build-id
from the
right .deb
in the unstable-debug
suite:
$ gdb -q /usr/sbin/netlabelctl
Reading symbols from /usr/sbin/netlabelctl...
Reading symbols from /usr/lib/debug/.build-id/e5/9f81f6573dadd5d95a6e4474d9388ab2777e2a.debug...
(gdb)
Yes! Yes it will!
$ file /usr/lib/debug/.build-id/e5/9f81f6573dadd5d95a6e4474d9388ab2777e2a.debug
/usr/lib/debug/.build-id/e5/9f81f6573dadd5d95a6e4474d9388ab2777e2a.debug: ELF 64-bit LSB shared object, x86-64, version 1 (SYSV), dynamically linked, interpreter *empty*, BuildID[sha1]=e59f81f6573dadd5d95a6e4474d9388ab2777e2a, for GNU/Linux 3.2.0, with debug_info, not stripped
9p
is mainline, which is great, but it s not robust.
Network issues or server restarts will wedge the mountpoint (Linux can t
reconnect when the tcp connection breaks), and things that work fine on local
filesystems get translated in a way that causes a lot of network chatter for
instance, just due to the way the syscalls are translated, doing an ls
, will
result in a stat
call for each file in the directory, even though linux had
just got a stat
entry for every file while it was resolving directory names.
On top of that, Linux will serialize all I/O with the server, so there s no
concurrent requests for file information, writes, or reads pending at the same
time to the server; and read
and write
throughput will degrade as latency
increases due to increasing round-trip time, even though there are offsets
included in the read
and write
calls. It works well enough, but is
frustrating to run up against, since there s not a lot you can do server-side
to help with this beyond implementing the 9P2000.L
variant (which, maybe is
worth it).
tar
file and found the correct member, so for most files, we don t
know the real size to report when getting a stat
. We can t parse the tarfiles
for every stat
call, since that d make ls
even slower (bummer). Only
hiccup is that when I report a filesize of zero, gdb
throws a bit of a
fit; let s try with a size of 0
to start:
$ ls -lah /usr/lib/debug/.build-id/e5/9f81f6573dadd5d95a6e4474d9388ab2777e2a.debug
-r--r--r-- 1 root root 0 Dec 31 1969 /usr/lib/debug/.build-id/e5/9f81f6573dadd5d95a6e4474d9388ab2777e2a.debug
$ gdb -q /usr/sbin/netlabelctl
Reading symbols from /usr/sbin/netlabelctl...
Reading symbols from /usr/lib/debug/.build-id/e5/9f81f6573dadd5d95a6e4474d9388ab2777e2a.debug...
warning: Discarding section .note.gnu.build-id which has a section size (24) larger than the file size [in module /usr/lib/debug/.build-id/e5/9f81f6573dadd5d95a6e4474d9388ab2777e2a.debug]
[...]
This obviously won t work since gdb
will throw away all our hard work because
of stat
s output, and neither will loading the real size of the underlying
file. That only leaves us with hardcoding a file size and hope nothing else
breaks significantly as a result. Let s try it again:
$ ls -lah /usr/lib/debug/.build-id/e5/9f81f6573dadd5d95a6e4474d9388ab2777e2a.debug
-r--r--r-- 1 root root 954M Dec 31 1969 /usr/lib/debug/.build-id/e5/9f81f6573dadd5d95a6e4474d9388ab2777e2a.debug
$ gdb -q /usr/sbin/netlabelctl
Reading symbols from /usr/sbin/netlabelctl...
Reading symbols from /usr/lib/debug/.build-id/e5/9f81f6573dadd5d95a6e4474d9388ab2777e2a.debug...
(gdb)
Much better. I mean, terrible but better. Better for now, anyway.
9p
arigato
-based filesystems for use around my LAN, but I
don t think I ll be moving to use debugfs
until I can figure out how to
ensure the connection is more resilient to changing networks, server restarts
and fixes on i/o performance. I think it was a useful exercise and is a pretty
great hack, but I don t think this ll be shipping anywhere anytime soon.
Along with me publishing this post, I ve pushed up all my repos; so you
should be able to play along at home! There s a lot more work to be done
on arigato
; but it does handshake and successfully export a working
9P2000.u
filesystem. Check it out on on my github at
arigato,
debugfs
and also on crates.io
and docs.rs.
At least I can say I was here and I got it working after all these years.
dh-sequence-movetousr
or has an open bug report. The package
set relevant to debootstrap
except for the set that has to be uploaded
concurrently has been moved to /usr and is awaiting migration. The move of
coreutils
happened to affect piuparts
which hard codes the location of
/bin/sync
and received multiple updates as a result.
-Werror=implicit-function-declaration
.linux-libc-dev
and the recent gcc-for-host
changes and also
fixed a 64bit-time_t FTBFS in libtextwrap
.resolv.conf
when the primary network went down, but then I would have to get into moving files around based on networking status and that felt a bit clunky.
So I decided to finally setup a proper local recursive DNS server, which is something I ve kinda meant to do for a while but never had sufficient reason to look into. Last time I did this I did it with BIND 9 but there are more options these days, and I decided to go with unbound, which is primarily focused on recursive DNS.
One extra wrinkle, pointed out by Lars, is that having dynamic name information from DHCP hosts is exceptionally convenient. I ve kept dnsmasq as the local DHCP server, so I wanted to be able to forward local queries there.
I m doing all of this on my RB5009, running Debian. Installing unbound was a simple matter of apt install unbound
. I needed 2 pieces of configuration over the default, one to enable recursive serving for the house networks, and one to enable forwarding of queries for the local domain to dnsmasq. I originally had specified the wildcard address for listening, but this caused problems with the fact my router has many interfaces and would sometimes respond from a different address than the request had come in on.
server:
interface: 192.0.2.1
interface: 2001::db8:f00d::1
access-control: 192.0.2.0/24 allow
access-control: 2001::db8:f00d::/56 allow
server:
domain-insecure: "example.org"
do-not-query-localhost: no
forward-zone:
name: "example.org"
forward-addr: 127.0.0.1@5353
interface=lo
port=5353
dhcp-option=option6:dns-server,[2001::db8:f00d::1]
dhcp-option=option:dns-server,192.0.2.1
So far, I have not found any reproducibility issues; everything I tested I was able to get to build bit-for-bit identical with what is in the Debian archive.That is to say, reproducibility testing permitted Vagrant and Debian to claim with some confidence that builds performed when this vulnerable version of XZ was installed were not interfered with.
Functional package managers (FPMs) and reproducible builds (R-B) are technologies and methodologies that are conceptually very different from the traditional software deployment model, and that have promising properties for software supply chain security. This thesis aims to evaluate the impact of FPMs and R-B on the security of the software supply chain and propose improvements to the FPM model to further improve trust in the open source supply chain. PDFJulien s paper poses a number of research questions on how the model of distributions such as GNU Guix and NixOS can be leveraged to further improve the safety of the software supply chain , etc.
normal
to a new level of wishlist
. In addition, 28 reviews of Debian packages were added, 38 were updated and 23 were removed this month adding to ever-growing knowledge about identified issues. As part of this effort, a number of issue types were updated, including Chris Lamb adding a new ocaml_include_directories
toolchain issue [ ] and James Addison adding a new filesystem_order_in_java_jar_manifest_mf_include_resource
issue [ ] and updating the random_uuid_in_notebooks_generated_by_nbsphinx
to reference a relevant discussion thread [ ].
In addition, Roland Clobus posted his 24th status update of reproducible Debian ISO images. Roland highlights that the images for Debian unstable often cannot be generated due to changes in that distribution related to the 64-bit time_t
transition.
Lastly, Bernhard M. Wiedemann posted another monthly update for his reproducibility work in openSUSE.
buildinfo
file was present. Arnout Engelen responded with some details.
diff-zip-meta.py
tool to expose extra timestamps embedded in .zip
and .apk
metadata.
CITATION.cff
file. Pol also added an substantial new section to the buy in page documenting the role of Software Bill of Materials (SBOMs) and ephemeral development environments. [ ][ ]
amd64
virtual machines. [ ][ ][ ]
set
data structure is also affected by the PYTHONHASHSEED
functionality. [ ]
259
, 260
and 261
to Debian and made the following additional changes:
zipdetails
tool from the Perl distribution. Thanks to Fay Stegerman and Larry Doolittle et al. for the pointer and thread about this tool. [ ]File.recognizes
so we actually perform the filename check for GNU R data files. [ ].rdb
file without an equivalent .rdx
file. (#1066991).pyc
file with an empty one. [ ].epub
tests after supporting the new zipdetails
tool. [ ]test_zip.py
. [ ]zipfile
module changed to detect potentially insecure overlapping entries within .zip
files. (#362)
Chris Lamb also updated the trydiffoscope
command line client, dropping a build-dependency on the deprecated python3-distutils
package to fix Debian bug #1065988 [ ], taking a moment to also refresh the packaging to the latest Debian standards [ ]. Finally, Vagrant Cascadian submitted an update for diffoscope version 260 in GNU Guix. [ ]
helm
(SSL-related build failure)java-21-openjdk
(parallelism)libressl
(SSL-related build failure)nfdump
(date issue)python-django-q
(avoid stuck build)python-smart-open
(fails to build on single-CPU machines)python-stdnum
(fails to build in 2039)python-yarl
(regression)qemu
(build failure)rabbitmq-java-client
(with Fridrich Strba; Maven timestamp issue)rmw
(build fails in 2038)warewulf
(with Egbert Eich; cpio
modification time and inode issue)wxWidgets
(fails to build in 2038)python-quantities
.gnome-maps
.tox
.q2cli
.mpl-sphinx-theme
.woof-doom
.bochs
.storm-lang
.librsvg
.gretl
.postfix
.node-function-bind
.python-pysaml2
.golang-github-stvp-tempredis
.matplotlib
.pathos
.rdflib
.xonsh
.maven-bundle-plugin
. (This patch was then uploaded by Mattia Rizzollo.)geany
(toolchain-related issue for glfw
)%check
section, thus failing when built with the --no-checks
option. Only half of all openSUSE packages were tested so far, but a large number of bugs were filed, including ones against caddy
, exiv2
, gnome-disk-utility
, grisbi
, gsl
, itinerary
, kosmindoormap
, libQuotient
, med-tools
, plasma6-disks
, pspp
, python-pypuppetdb
, python-urlextract
, rsync
, vagrant-libvirt
and xsimd
.
Similarly, Jean-Pierre De Jesus DIAZ employed reproducible builds techniques in order to test a proposed refactor of the ath9k-htc-firmware
package. As the change produced bit-for-bit identical binaries to the previously shipped pre-built binaries:
I don t have the hardware to test this firmware, but the build produces the same hashes for the firmware so it s safe to say that the firmware should keep working.
armhf
again. [ ][ ]i386
architecture queue. [ ]stats_buildinfo.png
graph once per day. [ ][ ]systemctl
with new systemd-based services. [ ]armhf
and i386
continuous integration tests in order to get some stability back. [ ]deb.debian.org
CDN everywhere. [ ]zst
to the list of packages which are false-positive diskspace issues. [ ]Bot
in the userAgent
for Git. (Re: #929013). [ ]tmpfs
size on our OUSL nodes. [ ]reproducible_build
service. [ ][ ]OOMPolicy=continue
and OOMScoreAdjust=-1000
for both the Jenkins and the reproducible_build
service. [ ]systemd
slice to group all relevant services. [ ][ ]shellcheck
tool. [ ]systemd-run
to handle diffoscope s exit codes specially. [ ]pgrep
tool over grepping the output of ps
. [ ]i386
and armhf
architecture builders. [ ][ ]armhf
architecture due to the time_t
transition. [ ]i386
& armhf
workers. [ ][ ][ ]pbuilder
updates in the unstable distribution, but only on the armhf
architecture. [ ]systemd
service operates. [ ][ ]powercycle_x86_nodes.py
script to use the new IONOS API and its new Python bindings. [ ]stunnel
tool anymore, it shouldn t be needed by anything anymore. [ ]arm64
architecture host keys. [ ]-
) in a variable in order to allow for tags in openQA. [ ]#reproducible-builds
on irc.oftc.net
.
rb-general@lists.reproducible-builds.org
git@github.com
with publickey authentication.
They were using the standard way that everyone manages SSH keys: the ~/.ssh/authorized_keys
file, and that became a problem as the number of keys started to grow.
The way that SSH uses this file is that, when a user connects and asks for publickey authentication, SSH opens the ~/.ssh/authorized_keys
file and scans all of the keys listed in it, looking for a key which matches the key that the user presented.
This linear search is normally not a huge problem, because nobody in their right mind puts more than a few keys in their ~/.ssh/authorized_keys
, right?
Of course, as a popular, rapidly-growing service, GitHub was gaining users at a fair clip, to the point that the one big file that stored all the SSH keys was starting to visibly impact SSH login times.
This problem was also not going to get any better by itself.
Something Had To Be Done.
EY management was keen on making sure GitHub ran well, and so despite it not really being a hosting problem, they were willing to help fix this problem.
For some reason, the late, great, Ezra Zygmuntowitz pointed GitHub in my direction, and let me take the time to really get into the problem with the GitHub team.
After examining a variety of different possible solutions, we came to the conclusion that the least-worst option was to patch OpenSSH to lookup keys in a MySQL database, indexed on the key fingerprint.
We didn t take this decision on a whim it wasn t a case of yeah, sure, let s just hack around with OpenSSH, what could possibly go wrong? .
We knew it was potentially catastrophic if things went sideways, so you can imagine how much worse the other options available were.
Ensuring that this wouldn t compromise security was a lot of the effort that went into the change.
In the end, though, we rolled it out in early April, and lo! SSH logins were fast, and we were pretty sure we wouldn t have to worry about this problem for a long time to come.
Normally, you d think patching OpenSSH to make mass SSH logins super fast would be a good story on its own.
But no, this is just the opening scene.
Courtesy of my CRANberries, there is a diffstat report relative to previous release. More detailed information is on the RcppArmadillo page. Questions, comments etc should go to the rcpp-devel mailing list off the Rcpp R-Forge page. If you like this or other open-source work I do, you can sponsor me at GitHub.Changes in RcppArmadillo version 0.12.8.2.0 (2024-04-02)
- Upgraded to Armadillo release 12.8.2 (Cortisol Injector)
- Workaround for FFTW3 header clash
- Workaround in testing framework for issue under macOS
- Minor cleanups to reduce code bloat
- Improved documentation
This post by Dirk Eddelbuettel originated on his Thinking inside the box blog. Please report excessive re-aggregation in third-party for-profit settings.
I usually talk about me whenever I am talking about answering the question who am I, I usually say like I am a librarian turned free software enthusiast and a Debian Developer. So I had no technical background and I learned, I was introduced to free software through my husband and then I learned Debian packaging, and eventually I became a Debian Developer. So I always give my example to people who say I am not technically inclined, I don't have technical background so I can't contribute to free software. So yeah, that's what I refer to myself.For the next question, could you tell me what do you do in Debian, and could you mention your story up until here today? [Sruthi]:
Okay, so let me start from my initial days in Debian. I started contributing to Debian, my first contribution was a Tibetan font. We went to a Tibetan place and they were saying they didn't have a font in Linux. So that's how I started contributing. Then I moved on to Ruby packages, then I have some JavaScript and Go packages, all dependencies of GitLab. So I was involved with maintaining GitLab for some time, now I'm not very active there. But yeah, so GitLab was the main package I was contributing to since I contributed since 2016 to maybe like 2020 or something. Later I have come [over to] packaging. Now I am part of some of the teams, delegated teams, like community team and outreach team, as well as the Debconf committee. And the biggest, I think, my activity in Debian, I would say is organizing Debconf 2023. So it was a great experience and yeah, so that's my story in Debian.So what are three key terms about you and your candidacy? [Sruthi]:
Okay, let me first think about it. For candidacy, I can start with diversity is one point I started expressing from the first time I contested for DPL. But to be honest, that's the main point I want to bring.[Yashraj]:
So for diversity, if you could break down your thoughts on diversity and make them, [about] your three points including diversity.[Sruthi]:
So in addition to, eventually when starting it was just diversity. Now I have like a bit more ideas, like community, like I want to be a leader for the Debian community. More than, I don't know, maybe people may not agree, but I would say I want to be a leader of Debian community rather than a Debian operating system. I connect to community more and third point I would say.The term of a DPL lasts for an year. So what do you think during, what would you try to do during that, that you can't do from your position now? [Sruthi]:
Okay. So I, like, I am very happy with the structure of Debian and how things work in Debian. Like you can do almost a lot of things, like almost all things without being a DPL. Whatever change you want to bring about or whatever you want to do, you can do without being a DPL. Anyone, like every DD has the same rights. Only things I feel [the] DPL has hold on are mainly the budget or the funding part, which like, that's where they do the decision making part. And then comes like, and one advantage of DPL driving some idea is that somehow people tend to listen to that with more, like, tend to give more attention to what DPL is saying rather than a normal DD. So I wanted to, like, I have answered some of the questions on how to, how I plan to do the financial budgeting part, how I want to handle, like, and the other thing is using the extra attention that I get as a DPL, I would like to obviously start with the diversity aspect in Debian. And yeah, like, I, what I want to do is not, like, be a leader and say, like, take Debian to one direction where I want to go, but I would rather take suggestions and inputs from the whole community and go about with that. So yes, that's what I would say.And taking a less serious question now, what is your preferred text editor? [Sruthi]:
Vim.[Yashraj]:
Vim, wholeheartedly team Vim?[Sruthi]:
Yes.[Yashraj]:
Great. Well, this was made in Vim, all the text for this.[Sruthi]:
So, like, since you mentioned extra data, I'll give my example, like, it's just a fun note, when I started contributing to Debian, as I mentioned, I didn't have any knowledge about free software, like Debian, and I was not used to even using Linux. So, and I didn't have experience with these text editors. So, when I started contributing, I used to do the editing part using gedit. So, that's how I started. Eventually, I moved to Nano, and once I reached Vim, I didn't move on.Team Vim. Next question. What, what do you think is the importance of the Debian project in the world today? And where would you like to see it in 10 years, like 10 years into the future? [Sruthi]:
Okay. So, Debian, as we all know, is referred to as the universal operating system without, like, it is said for a reason. We have hundreds and hundreds of operating systems, like Linux, distributions based on Debian. So, I believe Debian, like even now, Debian has good influence on the, at least on the Linux or Linux ecosystem. So, what we implement in Debian has, like, is going to affect quite a lot of, like, a very good percentage of people using Linux. So, yes. So, I think Debian is one of the leading Linux distributions. And I think in 10 years, we should be able to reach a position, like, where we are not, like, even now, like, even these many years after having Linux, we face a lot of problems in newer and newer hardware coming up and installing on them is a big problem. Like, firmwares and all those things are getting more and more complicated. Like, it should be getting simpler, but it's getting more and more complicated. So, I, one thing I would imagine, like, I don't know if we will ever reach there, but I would imagine that eventually with the Debian, we should be able to have some, at least a few of the hardware developers or hardware producers have Debian pre-installed and those kind of things. Like, not, like, become, I'm not saying it's all, it's also available right now. What I'm saying is that it becomes prominent enough to be opted as, like, default distro.What part of Debian has made you And what part of the project has kept you going all through these years? [Sruthi]:
Okay. So, I started to contribute in 2016, and I was part of the team doing GitLab packaging, and we did have a lot of training workshops and those kind of things within India. And I was, like, I had interacted with some of the Indian DDs, but I never got, like, even through chat or mail. I didn't have a lot of interaction with the rest of the world, DDs. And the 2019 Debconf changed my whole perspective about Debian. Before that, I wasn't, like, even, I was interested in free software. I was doing the technical stuff and all. But after DebConf, my whole idea has been, like, my focus changed to the community. Debian community is a very welcoming, very interesting community to be with. And so, I believe that, like, 2019 DebConf was a for me. And that kept, from 2019, my focus has been to how to support, like, how, I moved to the community part of Debian from there. Then in 2020 I became part of the community team, and, like, I started being part of other teams. So, these, I would say, the Debian community is the one, like, aspect of Debian that keeps me whole, keeps me held on to the Debian ecosystem as a whole.Continuing to speak about Debian, what do you think, what is the first thing that comes to your mind when you think of Debian, like, the word, the community, what's the first thing? [Sruthi]:
I think I may sound like a broken record or something.[Yashraj]:
No, no.[Sruthi]:
Again, I would say the Debian community, like, it's the people who makes Debian, that makes Debian special. Like, apart from that, if I say, I would say I'm very, like, one part of Debian that makes me very happy is the, how the governing system of Debian works, the Debian constitution and all those things, like, it's a very unique thing for Debian. And, and it's like, when people say you can't work without a proper, like, establishment or even somebody deciding everything for you, it's difficult. When people say, like, we have been, Debian has been proving it for quite a long time now, that it's possible. So, so that's one thing I believe, like, that's one unique point. And I am very proud about that.What areas do you think Debian is failing in, how can it (that standing) be improved? [Sruthi]:
So, I think where Debian is failing now is getting new people into Debian. Like, I don't remember, like, exactly the answer. But I remember hearing someone mention, like, the average age of a Debian Developer is, like, above 40 or 45 or something, like, exact age, I don't remember. But it's like, Debian is getting old. Like, the people in Debian are getting old and we are not getting enough of new people into Debian. And that's very important to have people, like, new people coming up. Otherwise, eventually, like, after a few years, nobody, like, we won't have enough people to take the project forward. So, yeah, I believe that is where we need to work on. We are doing some efforts, like, being part of GSOC or outreachy and having maybe other events, like, local events. Like, we used to have a lot of Debian packaging workshops in India. And those kind of, I think, in Brazil and all, they all have, like, local communities are doing. But we are not very successful in retaining the people who maybe come and try out things. But we are not very good at retaining the people, like, retaining people who come. So, we need to work on those things. Right now, I don't have a solid answer for that. But one thing, like, I was thinking about is, like, having a Debian specific outreach project, wherein the focus will be about the Debian, like, starting will be more on, like, usually what happens in GSOC and outreach is that people come, have the, do the contributions, and they go back. Like, they don't have that connection with the Debian, like, Debian community or Debian project. So, what I envision with these, the Debian outreach, the Debian specific outreach is that we have some part of the internship, like, even before starting the internship, we have some sessions and, like, with the people in Debian having, like, getting them introduced to the Debian philosophy and Debian community and Debian, how Debian works. And those things, we focus on that. And then we move on to the technical internship parts. So, I believe this could do some good in having, like, when you have people you can connect to, you tend to stay back in a project mode. When you feel something more than, like, right now, we have so many technical stuff to do, like, the choice for a college student is endless. So, if they want, if they stay back for something, like, maybe for Debian, I would say, we need to have them connected to the Debian project before we go into technical parts. Like, technical parts, like, there are other things as well, where they can go and do the technical part, but, like, they can come here, like, yeah. So, that's what I was saying. Focused outreach projects is one thing. That's just one. That's not enough. We need more of, like, more ideas to have more new people come up. And I'm very happy with, like, the DebConf thing. We tend to get more and more people from the places where we have a DebConf. Brazil is an example. After the Debconf, they have quite a good improvement on Debian contributors. And I think in India also, it did give a good result. Like, we have more people contributing and staying back and those things. So, yeah. So, these were the things I would say, like, we can do to improve.For the final question, what field in free software do you, what field in free software generally do you think requires the most work to be put into it? What do you think is Debian's part in that field? [Sruthi]:
Okay. Like, right now, what comes to my mind is the free software licenses parts. Like, we have a lot of free software licenses, and there are non-free software licenses. But currently, I feel free software is having a big problem in enforcing these licenses. Like, there are, there may be big corporations or like some people who take up the whole, the code and may not follow the whole, for example, the GPL licenses. Like, we don't know how much of those, how much of the free softwares are used in the bigger things. Yeah, I agree. There are a lot of corporations who are afraid to touch free software. But there would be good amount of free software, free work that converts into property, things violating the free software licenses and those things. And we do not have the kind of like, we have SFLC, SFC, etc. But still, we do not have the ability to go behind and trace and implement the licenses. So, enforce those licenses and bring people who are violating the licenses forward and those kind of things is challenging because one thing is it takes time, like, and most importantly, money is required for the legal stuff. And not always people who like people who make small software, or maybe big, but they may not have the kind of time and money to have these things enforced. So, that's a big challenge free software is facing, especially in our current scenario. I feel we are having those, like, we need to find ways how we can get it sorted. I don't have an answer right now what to do. But this is a challenge I felt like and Debian's part in that. Yeah, as I said, I don't have a solution for that. But the Debian, so DFSG and Debian sticking on to the free software licenses is a good support, I think.So, that was the final question, Do you have anything else you want to mention for anyone watching this? [Sruthi]:
Not really, like, I am happy, like, I think I was able to answer the questions. And yeah, I would say who is watching. I won't say like, I'm the best DPL candidate, you can't have a better one or something. I stand for a reason. And if you believe in that, or the Debian community and Debian diversity, and those kinds of things, if you believe it, I hope you would be interested, like, you would want to vote for me. That's it. Like, I'm not, I'll make it very clear. I'm not doing a technical leadership part here. So, those, I can't convince people who want technical leadership to vote for me. But I would say people who connect with me, I hope they vote for me.
The problem here is with Open Source Software.I want to say not only is that view so myopic that it pushes towards the incorrect, but also it blinds us to more serious problems. Now, I don t pretend that there are no problems in the FLOSS community. There have been various pieces written about what this issue says about the FLOSS community (usually without actionable solutions). I m not here to say those pieces are wrong. Just that there s a bigger picture. So with this xz issue, it may well be a state actor (aka spy ) that added this malicious code to xz. We also know that proprietary software and systems can be vulnerable. For instance, a Twitter whistleblower revealed that Twitter employed Indian and Chinese spies, some knowingly. A recent report pointed to security lapses at Microsoft, including preventable lapses in security. According to the Wikipedia article on the SolarWinds attack, it was facilitated by various kinds of carelessness, including passwords being posted to Github and weak default passwords. They directly distributed malware-infested updates, encouraged customers to disable anti-malware tools when installing SolarWinds products, and so forth. It would be naive indeed to assume that there aren t black hat actors among the legions of programmers employed by companies that outsource work to low-cost countries some of which have challenges with bribery. So, given all this, we can t really say the problem is Open Source. Maybe it s more broad:
The problem here is with software.Maybe that inches us closer, but is it really accurate? We have all heard of Boeing s recent issues, which seem to have some element of root causes in corporate carelessness, cost-cutting, and outsourcing. That sounds rather similar to the SolarWinds issue, doesn t it?
Well then, the problem is capitalism.Maybe it has a role to play, but isn t it a little too easy to just say capitalism and throw up our hands helplessly, just as some do with FLOSS as at the start of this article? After all, capitalism also brought us plenty of products of very high quality over the years. When we can point to successful, non-careless products and I own some of them (for instance, my Framework laptop). We clearly haven t reached the root cause yet. And besides, what would you replace it with? All the major alternatives that have been tried have even stronger downsides. Maybe you replace it with better regulated capitalism , but that s still capitalism.
Then the problem must be with consumers.As this argument would go, it s consumers buying patterns that drive problems. Buyers individual and corporate seek flashy features and low cost, prizing those over quality and security. No doubt this is true in a lot of cases. Maybe greed or status-conscious societies foster it: Temu promises people to shop like a billionaire , and unloads on them cheap junk, which all but guarantees that shipments from Temu containing products made with forced labor are entering the United States on a regular basis . But consumers are also people, and some fraction of them are quite capable of writing fantastic software, and in fact, do so. So what we need is some way to seize control. Some way to do what is right, despite the pressures of consumers or corporations. Ah yes, dear reader, you have been slogging through all these paragraphs and now realize I have been leading you to this:
Then the solution is Open Source.Indeed. Faults and all, FLOSS is the most successful movement I know where people are bringing us back to the commons: working and volunteering for the common good, unleashing a thousand creative variants on a theme, iterating in every direction imaginable. We have FLOSS being vital parts of everything from $30 Raspberry Pis to space missions. It is bringing education and communication to impoverished parts of the world. It lets everyone write and release software. And, unlike the SolarWinds and Twitter issues, it exposes both clever solutions and security flaws to the world. If an authentication process in Windows got slower, we would all shrug and mutter Microsoft under our breath. Because, really, what else can we do? We have no agency with Windows. If an authentication process in Linux gets slower, anybody that s interested anybody at all can dive in and ask why and trace it down to root causes. Some look at this and say FLOSS is responsible for this mess. I look at it and say, this would be so much worse if it wasn t FLOSS and experience backs me up on this. FLOSS doesn t prevent security issues itself. What it does do is give capabilities to us all. The ability to investigate. Ability to fix. Yes, even the ability to break and its cousin, the power to learn. And, most rewarding, the ability to contribute.
New netplan status diff subcommand, finding differences between configuration and system state As the maintainer and lead developer for Netplan, I m proud to announce the general availability of Netplan v1.0 after more than 7 years of development efforts. Over the years, we ve so far had about 80 individual contributors from around the globe. This includes many contributions from our Netplan core-team at Canonical, but also from other big corporations such as Microsoft or Deutsche Telekom. Those contributions, along with the many we receive from our community of individual contributors, solidify Netplan as a healthy and trusted open source project. In an effort to make Netplan even more dependable, we started shipping upstream patch releases, such as 0.106.1 and 0.107.1, which make it easier to integrate fixes into our users custom workflows. With the release of version 1.0 we primarily focused on stability. However, being a major version upgrade, it allowed us to drop some long-standing legacy code from the libnetplan1 library. Removing this technical debt increases the maintainability of Netplan s codebase going forward. The upcoming Ubuntu 24.04 LTS and Debian 13 releases will ship Netplan v1.0 to millions of users worldwide.
gcc.tar.xz
.
More likely, they wouldn't bother with an actual trusting trust attack on
gcc, which would be a lot of work to get right. One problem with the ssh
backdoor is that well, not all servers on the internet run ssh. (Or
systemd.) So webservers seem a likely target of this kind of second stage
attack. Apache's docs include png files, nginx does not, but there's always
scope to add improved documentation to a project.
When would such a vulnerability have been introduced? In February, "Jia
Tan" wrote a new decoder for xz.
This added 1000+ lines of new C code across several commits. So much code
and in just the right place to insert something like this. And why take on
such a significant project just two months before inserting the ssh
backdoor? "Jia Tan" was already fully accepted as maintainer, and doing
lots of other work, it doesn't seem to me that they needed to start this
rewrite as part of their cover.
They were working closely with xz's author Lasse Collin in this, by
indications exchanging patches offlist as they developed it. So Lasse
Collin's commits in this time period are also worth scrutiny, because
they could have been influenced by "Jia Tan". One that
caught my eye comes immediately afterwards:
"prepares the code for alternative C versions and inline assembly"
Multiple versions and assembly mean even more places to hide such a
security hole.
I stress that I have not found such a security hole, I'm only considering
what the worst case possibilities are. I think we need to fully consider
them in order to decide how to fully wrap up this mess.
Whether such stealthy security holes have been introduced into xz by "Jia
Tan" or not, there are definitely indications that the ssh backdoor was not
the end of what they had planned.
For one thing, the "test file" based system they introduced
was extensible.
They could have been planning to add more test files later, that backdoored
xz in further ways.
And then there's the matter of the disabling of the Landlock sandbox. This
was not necessary for the ssh backdoor, because the sandbox is only used by
the xz
command, not by liblzma. So why did they potentially tip their
hand by adding that rogue "." that disables the sandbox?
A sandbox would not prevent the kind of attack I discuss above, where xz is
just modifying code that it decompresses. Disabling the sandbox suggests
that they were going to make xz run arbitrary code, that perhaps wrote to
files it shouldn't be touching, to install a backdoor in the system.
Both deb and rpm use xz compression, and with the sandbox disabled,
whether they link with liblzma or run the xz
command, a backdoored xz can
write to any file on the system while dpkg or rpm is running and noone is
likely to notice, because that's the kind of thing a package manager does.
My impression is that all of this was well planned and they were in it for
the long haul. They had no reason to stop with backdooring ssh, except for
the risk of additional exposure. But they decided to take that risk, with
the sandbox disabling. So they planned to do more, and every commit
by "Jia Tan", and really every commit that they could have influenced
needs to be distrusted.
This is why I've suggested to Debian that they
revert to an earlier version of xz.
That would be my advice to anyone distributing xz.
I do have a xz-unscathed
fork which I've carefully constructed to avoid all "Jia Tan" involved
commits. It feels good to not need to worry about dpkg
and tar
.
I only plan to maintain this fork minimally, eg security fixes.
Hopefully Lasse Collin will consider these possibilities and address
them in his response to the attack.
Next.