diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 139067f200b9fb441bfa8113d564d0cb3109a2d7..883fc27a7ea3e2829dfaf3ecf51934da36753b3f 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -356,6 +356,7 @@ EvalState::EvalState(const Strings & _searchPath, ref<Store> store) , sEpsilon(symbols.create("")) , repair(NoRepair) , store(store) + , regexCache(makeRegexCache()) , baseEnv(allocEnv(128)) , staticBaseEnv(false, 0) { diff --git a/src/libexpr/eval.hh b/src/libexpr/eval.hh index 80078d8a5a6bbee21c93c211b621ac6a30311b91..0e1f61baab35d7f5c684584e97c0f190d2ac9f9c 100644 --- a/src/libexpr/eval.hh +++ b/src/libexpr/eval.hh @@ -6,7 +6,6 @@ #include "symbol-table.hh" #include "config.hh" -#include <regex> #include <map> #include <optional> #include <unordered_map> @@ -65,6 +64,11 @@ typedef std::list<SearchPathElem> SearchPath; void initGC(); +struct RegexCache; + +std::shared_ptr<RegexCache> makeRegexCache(); + + class EvalState { public: @@ -120,7 +124,7 @@ private: std::unordered_map<Path, Path> resolvedPaths; /* Cache used by prim_match(). */ - std::unordered_map<std::string, std::regex> regexCache; + std::shared_ptr<RegexCache> regexCache; public: diff --git a/src/libexpr/flake/flakeref.cc b/src/libexpr/flake/flakeref.cc index 6363446f6caad535aba82f2f95f53fea3a8cdf80..d5c2ffe66c883d732acffead15322302d6f50316 100644 --- a/src/libexpr/flake/flakeref.cc +++ b/src/libexpr/flake/flakeref.cc @@ -1,6 +1,7 @@ #include "flakeref.hh" #include "store-api.hh" #include "url.hh" +#include "url-parts.hh" #include "fetchers.hh" #include "registry.hh" diff --git a/src/libexpr/flake/lockfile.cc b/src/libexpr/flake/lockfile.cc index a748469442e38dc7ede9e55b74bb76469ea79f66..78431f000eae0cff8cc041b00350840de5e2cd3e 100644 --- a/src/libexpr/flake/lockfile.cc +++ b/src/libexpr/flake/lockfile.cc @@ -1,5 +1,6 @@ #include "lockfile.hh" #include "store-api.hh" +#include "url-parts.hh" #include <nlohmann/json.hpp> diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index 7e8526ea135749ad5dd5aecabab630383fbd185e..9cfe3f402ff048c62d38ecd54fe58ff979cc4680 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -3085,17 +3085,25 @@ static RegisterPrimOp primop_hashString({ .fun = prim_hashString, }); -/* Match a regular expression against a string and return either - ‘null’ or a list containing substring matches. */ +struct RegexCache +{ + std::unordered_map<std::string, std::regex> cache; +}; + +std::shared_ptr<RegexCache> makeRegexCache() +{ + return std::make_shared<RegexCache>(); +} + void prim_match(EvalState & state, const Pos & pos, Value * * args, Value & v) { auto re = state.forceStringNoCtx(*args[0], pos); try { - auto regex = state.regexCache.find(re); - if (regex == state.regexCache.end()) - regex = state.regexCache.emplace(re, std::regex(re, std::regex::extended)).first; + auto regex = state.regexCache->cache.find(re); + if (regex == state.regexCache->cache.end()) + regex = state.regexCache->cache.emplace(re, std::regex(re, std::regex::extended)).first; PathSet context; const std::string str = state.forceString(*args[1], context, pos); diff --git a/src/libexpr/primops/fetchMercurial.cc b/src/libexpr/primops/fetchMercurial.cc index cef85cfef48d032c33fd1dcd64d25c9c8ed9e871..1a064ed5caf39f9b96c6b58ce3103cba5f8562c5 100644 --- a/src/libexpr/primops/fetchMercurial.cc +++ b/src/libexpr/primops/fetchMercurial.cc @@ -3,8 +3,7 @@ #include "store-api.hh" #include "fetchers.hh" #include "url.hh" - -#include <regex> +#include "url-parts.hh" namespace nix { diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 5ca0f852143f8b62309f975a76c0a93dca887663..ad7638d732fe033e152f54a45f4758648e66f1c2 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -3,6 +3,7 @@ #include "globals.hh" #include "tarfile.hh" #include "store-api.hh" +#include "url-parts.hh" #include <sys/time.h> diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc index a4db5c5fa6c47d77540c7c3edf35263c6a7bf28e..1737658a7de5a1ecbda973ef9ed5f8c8b71a35cf 100644 --- a/src/libfetchers/github.cc +++ b/src/libfetchers/github.cc @@ -3,6 +3,7 @@ #include "fetchers.hh" #include "globals.hh" #include "store-api.hh" +#include "url-parts.hh" #include <nlohmann/json.hpp> diff --git a/src/libfetchers/indirect.cc b/src/libfetchers/indirect.cc index b981d4d8ea68fd5b1d5f99bea14ccc35a51e91d2..74332ae3dda2b645b63f8a8310597220388c491e 100644 --- a/src/libfetchers/indirect.cc +++ b/src/libfetchers/indirect.cc @@ -1,4 +1,5 @@ #include "fetchers.hh" +#include "url-parts.hh" namespace nix::fetchers { diff --git a/src/libfetchers/mercurial.cc b/src/libfetchers/mercurial.cc index 3e76ffc4dd7c2db8afab3cf04db2db328d9360bd..d80c2ea7ad9c4899ca721a4c59672a0e0c5df226 100644 --- a/src/libfetchers/mercurial.cc +++ b/src/libfetchers/mercurial.cc @@ -3,6 +3,7 @@ #include "globals.hh" #include "tarfile.hh" #include "store-api.hh" +#include "url-parts.hh" #include <sys/time.h> diff --git a/src/libstore/names.cc b/src/libstore/names.cc index d1c8a6101f852cc48d99bf51633c84b16c3ea374..41e28dc9931085fb27ab6daede1d4ad1703b97bd 100644 --- a/src/libstore/names.cc +++ b/src/libstore/names.cc @@ -1,10 +1,18 @@ #include "names.hh" #include "util.hh" +#include <regex> + namespace nix { +struct Regex +{ + std::regex regex; +}; + + DrvName::DrvName() { name = ""; @@ -30,11 +38,18 @@ DrvName::DrvName(std::string_view s) : hits(0) } +DrvName::~DrvName() +{ } + + bool DrvName::matches(DrvName & n) { if (name != "*") { - if (!regex) regex = std::unique_ptr<std::regex>(new std::regex(name, std::regex::extended)); - if (!std::regex_match(n.name, *regex)) return false; + if (!regex) { + regex = std::make_unique<Regex>(); + regex->regex = std::regex(name, std::regex::extended); + } + if (!std::regex_match(n.name, regex->regex)) return false; } if (version != "" && version != n.version) return false; return true; @@ -99,7 +114,7 @@ DrvNames drvNamesFromArgs(const Strings & opArgs) { DrvNames result; for (auto & i : opArgs) - result.push_back(DrvName(i)); + result.emplace_back(i); return result; } diff --git a/src/libstore/names.hh b/src/libstore/names.hh index 00e14b8c79710de16e79089f629845b16ac3eb52..bc62aac93712871f88b3b00b55aa2579f015a62d 100644 --- a/src/libstore/names.hh +++ b/src/libstore/names.hh @@ -3,10 +3,11 @@ #include <memory> #include "types.hh" -#include <regex> namespace nix { +struct Regex; + struct DrvName { string fullName; @@ -16,10 +17,12 @@ struct DrvName DrvName(); DrvName(std::string_view s); + ~DrvName(); + bool matches(DrvName & n); private: - std::unique_ptr<std::regex> regex; + std::unique_ptr<Regex> regex; }; typedef list<DrvName> DrvNames; diff --git a/src/libutil/url-parts.hh b/src/libutil/url-parts.hh new file mode 100644 index 0000000000000000000000000000000000000000..64e06cfbc8c7a64f1162229421c949b3bd4e82de --- /dev/null +++ b/src/libutil/url-parts.hh @@ -0,0 +1,44 @@ +#pragma once + +#include <string> +#include <regex> + +namespace nix { + +// URI stuff. +const static std::string pctEncoded = "(?:%[0-9a-fA-F][0-9a-fA-F])"; +const static std::string schemeRegex = "(?:[a-z+.-]+)"; +const static std::string ipv6AddressRegex = "(?:\\[[0-9a-fA-F:]+\\])"; +const static std::string unreservedRegex = "(?:[a-zA-Z0-9-._~])"; +const static std::string subdelimsRegex = "(?:[!$&'\"()*+,;=])"; +const static std::string hostnameRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + ")*)"; +const static std::string hostRegex = "(?:" + ipv6AddressRegex + "|" + hostnameRegex + ")"; +const static std::string userRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|:)*)"; +const static std::string authorityRegex = "(?:" + userRegex + "@)?" + hostRegex + "(?::[0-9]+)?"; +const static std::string pcharRegex = "(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|[:@])"; +const static std::string queryRegex = "(?:" + pcharRegex + "|[/? \"])*"; +const static std::string segmentRegex = "(?:" + pcharRegex + "+)"; +const static std::string absPathRegex = "(?:(?:/" + segmentRegex + ")*/?)"; +const static std::string pathRegex = "(?:" + segmentRegex + "(?:/" + segmentRegex + ")*/?)"; + +// A Git ref (i.e. branch or tag name). +const static std::string refRegexS = "[a-zA-Z0-9][a-zA-Z0-9_.-]*"; // FIXME: check +extern std::regex refRegex; + +// Instead of defining what a good Git Ref is, we define what a bad Git Ref is +// This is because of the definition of a ref in refs.c in https://github.com/git/git +// See tests/fetchGitRefs.sh for the full definition +const static std::string badGitRefRegexS = "//|^[./]|/\\.|\\.\\.|[[:cntrl:][:space:]:?^~\[]|\\\\|\\*|\\.lock$|\\.lock/|@\\{|[/.]$|^@$|^$"; +extern std::regex badGitRefRegex; + +// A Git revision (a SHA-1 commit hash). +const static std::string revRegexS = "[0-9a-fA-F]{40}"; +extern std::regex revRegex; + +// A ref or revision, or a ref followed by a revision. +const static std::string refAndOrRevRegex = "(?:(" + revRegexS + ")|(?:(" + refRegexS + ")(?:/(" + revRegexS + "))?))"; + +const static std::string flakeIdRegexS = "[a-zA-Z][a-zA-Z0-9_-]*"; +extern std::regex flakeIdRegex; + +} diff --git a/src/libutil/url.cc b/src/libutil/url.cc index 88c09eef9d7743dd64977cd53a045b4bb5d5888d..c1bab866c1585f5089314f2508e0eb4e7e8ebdd3 100644 --- a/src/libutil/url.cc +++ b/src/libutil/url.cc @@ -1,4 +1,5 @@ #include "url.hh" +#include "url-parts.hh" #include "util.hh" namespace nix { diff --git a/src/libutil/url.hh b/src/libutil/url.hh index 1f716ba10ea5e86ef0e6fc97766667bc940be5c7..6e77142e35ac246869d5906da5e000ec3aaef20f 100644 --- a/src/libutil/url.hh +++ b/src/libutil/url.hh @@ -2,8 +2,6 @@ #include "error.hh" -#include <regex> - namespace nix { struct ParsedURL @@ -29,40 +27,4 @@ std::map<std::string, std::string> decodeQuery(const std::string & query); ParsedURL parseURL(const std::string & url); -// URI stuff. -const static std::string pctEncoded = "(?:%[0-9a-fA-F][0-9a-fA-F])"; -const static std::string schemeRegex = "(?:[a-z+.-]+)"; -const static std::string ipv6AddressRegex = "(?:\\[[0-9a-fA-F:]+\\])"; -const static std::string unreservedRegex = "(?:[a-zA-Z0-9-._~])"; -const static std::string subdelimsRegex = "(?:[!$&'\"()*+,;=])"; -const static std::string hostnameRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + ")*)"; -const static std::string hostRegex = "(?:" + ipv6AddressRegex + "|" + hostnameRegex + ")"; -const static std::string userRegex = "(?:(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|:)*)"; -const static std::string authorityRegex = "(?:" + userRegex + "@)?" + hostRegex + "(?::[0-9]+)?"; -const static std::string pcharRegex = "(?:" + unreservedRegex + "|" + pctEncoded + "|" + subdelimsRegex + "|[:@])"; -const static std::string queryRegex = "(?:" + pcharRegex + "|[/? \"])*"; -const static std::string segmentRegex = "(?:" + pcharRegex + "+)"; -const static std::string absPathRegex = "(?:(?:/" + segmentRegex + ")*/?)"; -const static std::string pathRegex = "(?:" + segmentRegex + "(?:/" + segmentRegex + ")*/?)"; - -// A Git ref (i.e. branch or tag name). -const static std::string refRegexS = "[a-zA-Z0-9][a-zA-Z0-9_.-]*"; // FIXME: check -extern std::regex refRegex; - -// Instead of defining what a good Git Ref is, we define what a bad Git Ref is -// This is because of the definition of a ref in refs.c in https://github.com/git/git -// See tests/fetchGitRefs.sh for the full definition -const static std::string badGitRefRegexS = "//|^[./]|/\\.|\\.\\.|[[:cntrl:][:space:]:?^~\[]|\\\\|\\*|\\.lock$|\\.lock/|@\\{|[/.]$|^@$|^$"; -extern std::regex badGitRefRegex; - -// A Git revision (a SHA-1 commit hash). -const static std::string revRegexS = "[0-9a-fA-F]{40}"; -extern std::regex revRegex; - -// A ref or revision, or a ref followed by a revision. -const static std::string refAndOrRevRegex = "(?:(" + revRegexS + ")|(?:(" + refRegexS + ")(?:/(" + revRegexS + "))?))"; - -const static std::string flakeIdRegexS = "[a-zA-Z][a-zA-Z0-9_-]*"; -extern std::regex flakeIdRegex; - } diff --git a/src/nix-env/nix-env.cc b/src/nix-env/nix-env.cc index e5a433ac013d97928068919e80fd5499b15edb24..3e7c453fb7f4bcb6df217a7ab63144875879c965 100644 --- a/src/nix-env/nix-env.cc +++ b/src/nix-env/nix-env.cc @@ -230,7 +230,7 @@ static DrvInfos filterBySelector(EvalState & state, const DrvInfos & allElems, { DrvNames selectors = drvNamesFromArgs(args); if (selectors.empty()) - selectors.push_back(DrvName("*")); + selectors.emplace_back("*"); DrvInfos elems; set<unsigned int> done;