ls: the missing options

I'm honored and pleased to be the person who gets to complete ls. This project, begun around when I was born, was slow to turn into anything more than a simple for loop over a dirent. It really took off in the mid and late 80's, when Richard Stallman added numerous features, and the growth has been steady ever since. But, a glance at the man page shows that ls has never quite been complete. It fell to me to finish the job, and I have produced several handy patches to this end:

The only obvious lack now is a -z option, which should make output filenames be NULL terminated for consuption by other programs. I think this would be easy to write, but I've been extermely busy IRL (moving lots of furniture) and didn't get to it. Any takers to write it?

Due to the nature of these patches, they conflict with each other. Here's a combined patch suitable to be applied and tested.

diff -ur orig/coreutils-8.13/src/ls.c coreutils-8.13/src/ls.c
--- orig/coreutils-8.13/src/ls.c    2011-07-28 06:38:27.000000000 -0400
+++ coreutils-8.13/src/ls.c 2012-04-01 12:41:56.835106346 -0400
@@ -270,6 +270,7 @@
 static int format_group_width (gid_t g);
 static void print_long_format (const struct fileinfo *f);
 static void print_many_per_line (void);
+static void print_jam (void);
 static size_t print_name_with_quoting (const struct fileinfo *f,
                                        bool symlink_target,
                                        struct obstack *stack,
@@ -382,6 +383,7 @@
    many_per_line for just names, many per line, sorted vertically.
    horizontal for just names, many per line, sorted horizontally.
    with_commas for just names, many per line, separated by commas.
+   jam to fit in the most information possible.
 
    -l (and other options that imply -l), -1, -C, -x and -m control
    this parameter.  */
@@ -392,7 +394,8 @@
     one_per_line,      /* -1 */
     many_per_line,     /* -C */
     horizontal,            /* -x */
-    with_commas            /* -m */
+    with_commas,       /* -m */
+    jam            /* -j */
   };
 
 static enum format format;
@@ -630,6 +633,11 @@
 
 static bool immediate_dirs;
 
+/* True means when multiple directories are being displayed, combine
+ * their contents as if all in one directory. -e */
+
+static bool entangle_dirs;
+
 /* True means that directories are grouped before files. */
 
 static bool directories_first;
@@ -705,6 +713,10 @@
 
 static bool format_needs_type;
 
+/* Answer "yes" to all prompts. */
+
+static bool yes;
+
 /* An arbitrary limit on the number of bytes in a printed time stamp.
    This is set to a relatively small value to avoid the need to worry
    about denial-of-service attacks on servers that run "ls" on behalf
@@ -804,6 +816,7 @@
   {"escape", no_argument, NULL, 'b'},
   {"directory", no_argument, NULL, 'd'},
   {"dired", no_argument, NULL, 'D'},
+  {"entangle", no_argument, NULL, 'e'},
   {"full-time", no_argument, NULL, FULL_TIME_OPTION},
   {"group-directories-first", no_argument, NULL,
    GROUP_DIRECTORIES_FIRST_OPTION},
@@ -849,12 +862,12 @@
 static char const *const format_args[] =
 {
   "verbose", "long", "commas", "horizontal", "across",
-  "vertical", "single-column", NULL
+  "vertical", "single-column", "jam", NULL
 };
 static enum format const format_types[] =
 {
   long_format, long_format, with_commas, horizontal, horizontal,
-  many_per_line, one_per_line
+  many_per_line, one_per_line, jam
 };
 ARGMATCH_VERIFY (format_args, format_types);
 
@@ -1448,6 +1461,9 @@
       print_dir_name = true;
     }
 
+  if (entangle_dirs)
+      print_current_files ();
+
   if (print_with_color)
     {
       int j;
@@ -1559,6 +1575,7 @@
   print_block_size = false;
   indicator_style = none;
   print_inode = false;
+  yes = false;
   dereference = DEREF_UNDEFINED;
   recursive = false;
   immediate_dirs = false;
@@ -1644,7 +1661,7 @@
     {
       int oi = -1;
       int c = getopt_long (argc, argv,
-                           "abcdfghiklmnopqrstuvw:xABCDFGHI:LNQRST:UXZ1",
+                           "abcdefghijklmnopqrstuvw:xyABCDFGHI:LNQRST:UXZ1",
                            long_options, &oi);
       if (c == -1)
         break;
@@ -1667,6 +1684,10 @@
           immediate_dirs = true;
           break;
 
+   case 'e':
+          entangle_dirs = true;
+     break;
+
         case 'f':
           /* Same as enabling -a -U and disabling -l -s.  */
           ignore_mode = IGNORE_MINIMAL;
@@ -1697,6 +1718,10 @@
           print_inode = true;
           break;
 
+   case 'j':
+     format = jam;
+     break;
+
         case 'k':
           human_output_opts = 0;
           file_output_block_size = output_block_size = 1024;
@@ -1765,6 +1790,10 @@
           format = horizontal;
           break;
 
+   case 'y':
+     yes = true;
+     break;
+
         case 'A':
           if (ignore_mode == IGNORE_DEFAULT)
             ignore_mode = IGNORE_DOT_AND_DOTDOT;
@@ -2510,7 +2539,7 @@
       DEV_INO_PUSH (dir_stat.st_dev, dir_stat.st_ino);
     }
 
-  if (recursive || print_dir_name)
+  if ((recursive || print_dir_name) && ! entangle_dirs)
     {
       if (!first)
         DIRED_PUTCHAR ('\n');
@@ -2526,7 +2555,8 @@
   /* Read the directory entries, and insert the subfiles into the `cwd_file'
      table.  */
 
-  clear_files ();
+  if (! entangle_dirs)
+     clear_files ();
 
   while (1)
     {
@@ -2615,7 +2645,7 @@
       DIRED_PUTCHAR ('\n');
     }
 
-  if (cwd_n_used)
+  if (cwd_n_used && ! entangle_dirs)
     print_current_files ();
 }
 
@@ -3464,6 +3494,10 @@
       print_with_commas ();
       break;
 
+    case jam:
+      print_jam ();
+      break;
+
     case long_format:
       for (i = 0; i < cwd_n_used; i++)
         {
@@ -4418,6 +4452,24 @@
   putchar ('\n');
 }
 
+static void
+print_jam (void)
+{
+  size_t filesno;
+  size_t pos = 0;
+
+  for (filesno = 0; filesno < cwd_n_used; filesno++)
+    {
+      struct fileinfo const *f = sorted_file[filesno];
+      size_t len = length_of_file_name_and_frills (f);
+
+      print_file_name_and_frills (f, pos);
+      pos += len;
+    }
+  putchar ('\n');
+}
+
+
 /* Assuming cursor is at position FROM, indent up to position TO.
    Use a TAB character instead of two or more spaces whenever possible.  */
 
@@ -4627,11 +4679,13 @@
   -D, --dired                generate output designed for Emacs' dired mode\n\
 "), stdout);
       fputs (_("\
+  -e, --entangle             display multiple directory contents as one\n\
   -f                         do not sort, enable -aU, disable -ls --color\n\
   -F, --classify             append indicator (one of */=>@|) to entries\n\
       --file-type            likewise, except do not append `*'\n\
       --format=WORD          across -x, commas -m, horizontal -x, long -l,\n\
                                single-column -1, verbose -l, vertical -C\n\
+                               jam -j\n\
       --full-time            like -l --time-style=full-iso\n\
 "), stdout);
       fputs (_("\
@@ -4667,6 +4721,8 @@
   -i, --inode                print the index number of each file\n\
   -I, --ignore=PATTERN       do not list implied entries matching shell PATTERN\
 \n\
+  -j                         jam output together, makes the most of limited\n\
+                             space on modern systems (cell phones, twitter)\n\
   -k                         like --block-size=1K\n\
 "), stdout);
       fputs (_("\
@@ -4733,6 +4789,7 @@
   -w, --width=COLS           assume screen width instead of current value\n\
   -x                         list entries by lines instead of by columns\n\
   -X                         sort alphabetically by entry extension\n\
+  -y                         answer all questions with \"yes\"\n\
   -Z, --context              print any SELinux security context of each file\n\
   -1                         list one file per line\n\
 "), stdout);

It remains to be seen if multi-option enabled coreutils will be accepted into Debian in time for the next release. Due to some disagreements with the coreutils maintainer, the matter has been referred to the Technical Committee (Flattr me)

Traditionally new ls contributors stop once enough options have been added that they can spell their name, in the best traditions of yellow snow. Once ls -richard -stallman worked, I'm sure RMS moved on other other more pressing concerns. The current maintainer, David MacKenzie, was clearly not done yet, since only ls -david -mack worked. But he was being slow to add these last few features, and ls was very deficient in the realm of spelling my name (ls -o -hss .. srsly?), so I took matter into my own hands in the best tradition of free software.

Posted
Stand by the grey stone when the thrush knocks

Today, map in hand, I explored the "long valley, narrower than the great dale in the South where the Gates of the river stood, and walled with lower spurs of the Mountain".

"The dangerous search on the western slopes for the secret door"
"It seemed as if darkness flowed out like a vapour from the hole in the mountain-side" "They spoke low and never called or sang, for danger brooded in every rock."
"It is almost dark so that its vastness can only be dimly guessed, but rising from the near side of the rocky floor there is a great glow. The glow of Smaug!"
Posted
moving my email archives and packages to git-annex

I've recently been moving some important data into git-annex, and finding it simplifies things while also increasing my flexibility.

email archives

I've kept my email archives in git for years. This works ok, just choose the right file format (compressed mbox) and number of files (one archive per mailbox per month or so) and git can handle this well enough, as email is not really large.

But, email is not really small either. Keeping my email repository checked out on my netbook consumes 2 gigabytes of its 30 gigabyte SSD, half of which is duplication in .git. Also, I have only kept it at 2 gigabytes through careful selection of what classes of mail I archive. That made sense when archival disk was more expensive, but what makes sense these days is to archive everything.

For a while I've wanted to have a "raw" archive, that includes all email I receive. (Even spam.) This protects against various disasters in mail filtering or reading. Setting that up was my impetus for switching my mail archives to git-annex today.

The new system I've settled on is to first copy all incoming mail into a "raw" maildir folder. Then mailfilter sorts it into the folders I sync (with offlineimap) and read. Each day, the "raw" folder is moved into a mbox archive, and that's added to the git annex. Each month, the mail I've read is moved into a monthly archives, and added to the git annex. A simple script does the work.

I counted the number of copies that existed of my mail when it was stored in git, and found 7 copies spread among just 3 drives. I decided to slim that back, and configured git-annex to require only 5 copies. But those 5 copies will spread among more drives, including several offline archival drives, so it will be more robust overall.

My netbook will have an incomplete checkout of my mail, omitting the "raw" archive. If I need to peek inside a spam folder for a lost mail, I can quickly pull it down; if I need to free up space I can quickly drop older archives. This is the flexibility that git-annex fans love. :)

By the way, this also makes it easier to permanently delete mail, when you really need to (ie, for contractual reasons). Before, I'd have to do a painful git-filter-branch if I needed to get rid of eg, mail for old jobs. Now I can git annex drop --force.

Pro Tip: If you're doing this kind of migration to git-annex, you can save bandwidth by not re-transferring files to machines that already have a copy. I ran this command on my netbook to inject the archives it had in the old repository into the new repository, verifying checksums as it goes:

cd ~/mail/archive; find -type l -exec git annex reinject ~/mail.old/archive/{} {} \;

Note on mairix compatibility: I use mairix to index and search my mail. But it refuses to follow git-annex's symlinks to the content. So I have to point it at .git/annex/objects/. I also configured annex.backend to SHA256E, which keeps the extensions on my compressed mailbox files, which is necessary for mairix to realize they're compressed.

debian packages

I'd evolved a complex and fragile chain of personal apt repositories to hold Debian packages I've released. I recently got rid of the mess, which looked like this: dput → local mini-dinstall repo → dputmini-dinstall repo on my server → dput → Debian

The point of all that was that I could "upload" a package locally while offline and batch transfer it later. And I had a local and a public apt repository of just the packages I've uploaded. But these days, packages uploaded to Debian are available nearly immediately, so there's not much reason to do that.

My old system also had a problem: It only kept the most recent single copy of each package. Again, disk is cheap, so I'd rather have archives of everything I have uploaded. Again I switched to git-annex.

My new system is simplicity itself. I release a package by checking it into a "toupload" directory in my git annex repository on my netbook. Items in that directory are dput to Debian and moved to "released". I have various other clones of that repository, which I git annex move packages to periodically to free up SSD space. In the rare cases when I build a package on a server, I check it into the clone on the server, and again rely on git-annex to copy it around.

Now, does anyone know a good way to download a copy of every package you're ever released from archive.debian.org? (Ideally as a list of urls I can feed to git annex addurl.)

conclusion

My email and Debian packages were the last large files I was not storing in git-annex. Even backups of my backups end up checked into git-annex and archived away.

Now that I'm using git-annex in every place I can, my goal with it is to make it as easy as possible for as many of you to use it as possible, too. I have some inotify tricks up my sleeve that seem promising. Kickstarter may be involved. Watch this space!

Posted