From e8e1d420f364afbfface61d3f03889e10e6066c9 Mon Sep 17 00:00:00 2001
From: Eelco Dolstra <edolstra@gmail.com>
Date: Mon, 21 Sep 2020 18:22:45 +0200
Subject: [PATCH] Don't include <regex> in header files

This reduces compilation time by ~15 seconds (CPU time).

Issue #4045.
---
 src/libexpr/eval.cc                   |  1 +
 src/libexpr/eval.hh                   |  8 +++--
 src/libexpr/flake/flakeref.cc         |  1 +
 src/libexpr/flake/lockfile.cc         |  1 +
 src/libexpr/primops.cc                | 18 ++++++++---
 src/libexpr/primops/fetchMercurial.cc |  3 +-
 src/libfetchers/git.cc                |  1 +
 src/libfetchers/github.cc             |  1 +
 src/libfetchers/indirect.cc           |  1 +
 src/libfetchers/mercurial.cc          |  1 +
 src/libstore/names.cc                 | 21 +++++++++++--
 src/libstore/names.hh                 |  7 +++--
 src/libutil/url-parts.hh              | 44 +++++++++++++++++++++++++++
 src/libutil/url.cc                    |  1 +
 src/libutil/url.hh                    | 38 -----------------------
 src/nix-env/nix-env.cc                |  2 +-
 16 files changed, 96 insertions(+), 53 deletions(-)
 create mode 100644 src/libutil/url-parts.hh

diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc
index 139067f20..883fc27a7 100644
--- a/src/libexpr/eval.cc
+++ b/src/libexpr/eval.cc
@@ -356,6 +356,7 @@ EvalState::EvalState(const Strings & _searchPath, ref<Store> store)
     , sEpsilon(symbols.create(""))
     , repair(NoRepair)
     , store(store)
+    , regexCache(makeRegexCache())
     , baseEnv(allocEnv(128))
     , staticBaseEnv(false, 0)
 {
diff --git a/src/libexpr/eval.hh b/src/libexpr/eval.hh
index 80078d8a5..0e1f61baa 100644
--- a/src/libexpr/eval.hh
+++ b/src/libexpr/eval.hh
@@ -6,7 +6,6 @@
 #include "symbol-table.hh"
 #include "config.hh"
 
-#include <regex>
 #include <map>
 #include <optional>
 #include <unordered_map>
@@ -65,6 +64,11 @@ typedef std::list<SearchPathElem> SearchPath;
 void initGC();
 
 
+struct RegexCache;
+
+std::shared_ptr<RegexCache> makeRegexCache();
+
+
 class EvalState
 {
 public:
@@ -120,7 +124,7 @@ private:
     std::unordered_map<Path, Path> resolvedPaths;
 
     /* Cache used by prim_match(). */
-    std::unordered_map<std::string, std::regex> regexCache;
+    std::shared_ptr<RegexCache> regexCache;
 
 public:
 
diff --git a/src/libexpr/flake/flakeref.cc b/src/libexpr/flake/flakeref.cc
index 6363446f6..d5c2ffe66 100644
--- a/src/libexpr/flake/flakeref.cc
+++ b/src/libexpr/flake/flakeref.cc
@@ -1,6 +1,7 @@
 #include "flakeref.hh"
 #include "store-api.hh"
 #include "url.hh"
+#include "url-parts.hh"
 #include "fetchers.hh"
 #include "registry.hh"
 
diff --git a/src/libexpr/flake/lockfile.cc b/src/libexpr/flake/lockfile.cc
index a74846944..78431f000 100644
--- a/src/libexpr/flake/lockfile.cc
+++ b/src/libexpr/flake/lockfile.cc
@@ -1,5 +1,6 @@
 #include "lockfile.hh"
 #include "store-api.hh"
+#include "url-parts.hh"
 
 #include <nlohmann/json.hpp>
 
diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc
index 7e8526ea1..9cfe3f402 100644
--- a/src/libexpr/primops.cc
+++ b/src/libexpr/primops.cc
@@ -3085,17 +3085,25 @@ static RegisterPrimOp primop_hashString({
     .fun = prim_hashString,
 });
 
-/* Match a regular expression against a string and return either
-   ‘null’ or a list containing substring matches. */
+struct RegexCache
+{
+    std::unordered_map<std::string, std::regex> cache;
+};
+
+std::shared_ptr<RegexCache> makeRegexCache()
+{
+    return std::make_shared<RegexCache>();
+}
+
 void prim_match(EvalState & state, const Pos & pos, Value * * args, Value & v)
 {
     auto re = state.forceStringNoCtx(*args[0], pos);
 
     try {
 
-        auto regex = state.regexCache.find(re);
-        if (regex == state.regexCache.end())
-            regex = state.regexCache.emplace(re, std::regex(re, std::regex::extended)).first;
+        auto regex = state.regexCache->cache.find(re);
+        if (regex == state.regexCache->cache.end())
+            regex = state.regexCache->cache.emplace(re, std::regex(re, std::regex::extended)).first;
 
         PathSet context;
         const std::string str = state.forceString(*args[1], context, pos);
diff --git a/src/libexpr/primops/fetchMercurial.cc b/src/libexpr/primops/fetchMercurial.cc
index cef85cfef..1a064ed5c 100644
--- a/src/libexpr/primops/fetchMercurial.cc
+++ b/src/libexpr/primops/fetchMercurial.cc
@@ -3,8 +3,7 @@
 #include "store-api.hh"
 #include "fetchers.hh"
 #include "url.hh"
-
-#include <regex>
+#include "url-parts.hh"
 
 namespace nix {
 
diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc
index 5ca0f8521..ad7638d73 100644
--- a/src/libfetchers/git.cc
+++ b/src/libfetchers/git.cc
@@ -3,6 +3,7 @@
 #include "globals.hh"
 #include "tarfile.hh"
 #include "store-api.hh"
+#include "url-parts.hh"
 
 #include <sys/time.h>
 
diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc
index a4db5c5fa..1737658a7 100644
--- a/src/libfetchers/github.cc
+++ b/src/libfetchers/github.cc
@@ -3,6 +3,7 @@
 #include "fetchers.hh"
 #include "globals.hh"
 #include "store-api.hh"
+#include "url-parts.hh"
 
 #include <nlohmann/json.hpp>
 
diff --git a/src/libfetchers/indirect.cc b/src/libfetchers/indirect.cc
index b981d4d8e..74332ae3d 100644
--- a/src/libfetchers/indirect.cc
+++ b/src/libfetchers/indirect.cc
@@ -1,4 +1,5 @@
 #include "fetchers.hh"
+#include "url-parts.hh"
 
 namespace nix::fetchers {
 
diff --git a/src/libfetchers/mercurial.cc b/src/libfetchers/mercurial.cc
index 3e76ffc4d..d80c2ea7a 100644
--- a/src/libfetchers/mercurial.cc
+++ b/src/libfetchers/mercurial.cc
@@ -3,6 +3,7 @@
 #include "globals.hh"
 #include "tarfile.hh"
 #include "store-api.hh"
+#include "url-parts.hh"
 
 #include <sys/time.h>
 
diff --git a/src/libstore/names.cc b/src/libstore/names.cc
index d1c8a6101..41e28dc99 100644
--- a/src/libstore/names.cc
+++ b/src/libstore/names.cc
@@ -1,10 +1,18 @@
 #include "names.hh"
 #include "util.hh"
 
+#include <regex>
+
 
 namespace nix {
 
 
+struct Regex
+{
+    std::regex regex;
+};
+
+
 DrvName::DrvName()
 {
     name = "";
@@ -30,11 +38,18 @@ DrvName::DrvName(std::string_view s) : hits(0)
 }
 
 
+DrvName::~DrvName()
+{ }
+
+
 bool DrvName::matches(DrvName & n)
 {
     if (name != "*") {
-        if (!regex) regex = std::unique_ptr<std::regex>(new std::regex(name, std::regex::extended));
-        if (!std::regex_match(n.name, *regex)) return false;
+        if (!regex) {
+            regex = std::make_unique<Regex>();
+            regex->regex = std::regex(name, std::regex::extended);
+        }
+        if (!std::regex_match(n.name, regex->regex)) return false;
     }
     if (version != "" && version != n.version) return false;
     return true;
@@ -99,7 +114,7 @@ DrvNames drvNamesFromArgs(const Strings & opArgs)
 {
     DrvNames result;
     for (auto & i : opArgs)
-        result.push_back(DrvName(i));
+        result.emplace_back(i);
     return result;
 }
 
diff --git a/src/libstore/names.hh b/src/libstore/names.hh
index 00e14b8c7..bc62aac93 100644
--- a/src/libstore/names.hh
+++ b/src/libstore/names.hh
@@ -3,10 +3,11 @@
 #include <memory>
 
 #include "types.hh"
-#include <regex>
 
 namespace nix {
 
+struct Regex;
+
 struct DrvName
 {
     string fullName;
@@ -16,10 +17,12 @@ struct DrvName
 
     DrvName();
     DrvName(std::string_view s);
+    ~DrvName();
+
     bool matches(DrvName & n);
 
 private:
-    std::unique_ptr<std::regex> regex;
+    std::unique_ptr<Regex> regex;
 };
 
 typedef list<DrvName> DrvNames;
diff --git a/src/libutil/url-parts.hh b/src/libutil/url-parts.hh
new file mode 100644
index 000000000..64e06cfbc
--- /dev/null
+++ b/src/libutil/url-parts.hh
@@ -0,0 +1,44 @@
+#pragma once
+
+#include <string>
+#include <regex>
+
+namespace nix {
+
+// URI stuff.
+const static std::string pctEncoded = "(?:%[0-9a-fA-F][0-9a-fA-F])";
+const static std::string schemeRegex = "(?:[a-z+.-]+)";
+const static std::string ipv6AddressRegex = "(?:\\[[0-9a-fA-F:]+\\])";
+const static std::string unreservedRegex = "(?:[a-zA-Z0-9-._~])";
+const static std::string subdelimsRegex = "(?:[!$&'\"()*+,;=])";
+const static std::string hostnameRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + ")*)";
+const static std::string hostRegex = "(?:" + ipv6AddressRegex + "|" + hostnameRegex + ")";
+const static std::string userRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|:)*)";
+const static std::string authorityRegex = "(?:" + userRegex + "@)?" + hostRegex + "(?::[0-9]+)?";
+const static std::string pcharRegex = "(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|[:@])";
+const static std::string queryRegex = "(?:" + pcharRegex + "|[/? \"])*";
+const static std::string segmentRegex = "(?:" + pcharRegex + "+)";
+const static std::string absPathRegex = "(?:(?:/" + segmentRegex + ")*/?)";
+const static std::string pathRegex = "(?:" + segmentRegex + "(?:/" + segmentRegex + ")*/?)";
+
+// A Git ref (i.e. branch or tag name).
+const static std::string refRegexS = "[a-zA-Z0-9][a-zA-Z0-9_.-]*"; // FIXME: check
+extern std::regex refRegex;
+
+// Instead of defining what a good Git Ref is, we define what a bad Git Ref is
+// This is because of the definition of a ref in refs.c in https://github.com/git/git
+// See tests/fetchGitRefs.sh for the full definition
+const static std::string badGitRefRegexS = "//|^[./]|/\\.|\\.\\.|[[:cntrl:][:space:]:?^~\[]|\\\\|\\*|\\.lock$|\\.lock/|@\\{|[/.]$|^@$|^$";
+extern std::regex badGitRefRegex;
+
+// A Git revision (a SHA-1 commit hash).
+const static std::string revRegexS = "[0-9a-fA-F]{40}";
+extern std::regex revRegex;
+
+// A ref or revision, or a ref followed by a revision.
+const static std::string refAndOrRevRegex = "(?:(" + revRegexS + ")|(?:(" + refRegexS + ")(?:/(" + revRegexS + "))?))";
+
+const static std::string flakeIdRegexS = "[a-zA-Z][a-zA-Z0-9_-]*";
+extern std::regex flakeIdRegex;
+
+}
diff --git a/src/libutil/url.cc b/src/libutil/url.cc
index 88c09eef9..c1bab866c 100644
--- a/src/libutil/url.cc
+++ b/src/libutil/url.cc
@@ -1,4 +1,5 @@
 #include "url.hh"
+#include "url-parts.hh"
 #include "util.hh"
 
 namespace nix {
diff --git a/src/libutil/url.hh b/src/libutil/url.hh
index 1f716ba10..6e77142e3 100644
--- a/src/libutil/url.hh
+++ b/src/libutil/url.hh
@@ -2,8 +2,6 @@
 
 #include "error.hh"
 
-#include <regex>
-
 namespace nix {
 
 struct ParsedURL
@@ -29,40 +27,4 @@ std::map<std::string, std::string> decodeQuery(const std::string & query);
 
 ParsedURL parseURL(const std::string & url);
 
-// URI stuff.
-const static std::string pctEncoded = "(?:%[0-9a-fA-F][0-9a-fA-F])";
-const static std::string schemeRegex = "(?:[a-z+.-]+)";
-const static std::string ipv6AddressRegex = "(?:\\[[0-9a-fA-F:]+\\])";
-const static std::string unreservedRegex = "(?:[a-zA-Z0-9-._~])";
-const static std::string subdelimsRegex = "(?:[!$&'\"()*+,;=])";
-const static std::string hostnameRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + ")*)";
-const static std::string hostRegex = "(?:" + ipv6AddressRegex + "|" + hostnameRegex + ")";
-const static std::string userRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|:)*)";
-const static std::string authorityRegex = "(?:" + userRegex + "@)?" + hostRegex + "(?::[0-9]+)?";
-const static std::string pcharRegex = "(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|[:@])";
-const static std::string queryRegex = "(?:" + pcharRegex + "|[/? \"])*";
-const static std::string segmentRegex = "(?:" + pcharRegex + "+)";
-const static std::string absPathRegex = "(?:(?:/" + segmentRegex + ")*/?)";
-const static std::string pathRegex = "(?:" + segmentRegex + "(?:/" + segmentRegex + ")*/?)";
-
-// A Git ref (i.e. branch or tag name).
-const static std::string refRegexS = "[a-zA-Z0-9][a-zA-Z0-9_.-]*"; // FIXME: check
-extern std::regex refRegex;
-
-// Instead of defining what a good Git Ref is, we define what a bad Git Ref is
-// This is because of the definition of a ref in refs.c in https://github.com/git/git
-// See tests/fetchGitRefs.sh for the full definition
-const static std::string badGitRefRegexS = "//|^[./]|/\\.|\\.\\.|[[:cntrl:][:space:]:?^~\[]|\\\\|\\*|\\.lock$|\\.lock/|@\\{|[/.]$|^@$|^$";
-extern std::regex badGitRefRegex;
-
-// A Git revision (a SHA-1 commit hash).
-const static std::string revRegexS = "[0-9a-fA-F]{40}";
-extern std::regex revRegex;
-
-// A ref or revision, or a ref followed by a revision.
-const static std::string refAndOrRevRegex = "(?:(" + revRegexS + ")|(?:(" + refRegexS + ")(?:/(" + revRegexS + "))?))";
-
-const static std::string flakeIdRegexS = "[a-zA-Z][a-zA-Z0-9_-]*";
-extern std::regex flakeIdRegex;
-
 }
diff --git a/src/nix-env/nix-env.cc b/src/nix-env/nix-env.cc
index e5a433ac0..3e7c453fb 100644
--- a/src/nix-env/nix-env.cc
+++ b/src/nix-env/nix-env.cc
@@ -230,7 +230,7 @@ static DrvInfos filterBySelector(EvalState & state, const DrvInfos & allElems,
 {
     DrvNames selectors = drvNamesFromArgs(args);
     if (selectors.empty())
-        selectors.push_back(DrvName("*"));
+        selectors.emplace_back("*");
 
     DrvInfos elems;
     set<unsigned int> done;
-- 
GitLab