Program Listing for File pare_keyname_filename.hpp¶
↰ Return to documentation for file (include/dish2/utility/pare_keyname_filename.hpp)
#pragma once
#ifndef DISH2_UTILITY_PARE_KEYNAME_FILENAME_HPP_INCLUDE
#define DISH2_UTILITY_PARE_KEYNAME_FILENAME_HPP_INCLUDE
#include <cstdio>
#include <fstream>
#include <iostream>
#include <stdlib.h>
#include <string>
#include <utility>
#include <unistd.h>
#include "../../../third-party/conduit/include/uitsl/debug/safe_compare.hpp"
#include "../../../third-party/conduit/include/uitsl/polyfill/erase_if.hpp"
#include "../../../third-party/conduit/include/uitsl/polyfill/filesystem.hpp"
#include "../../../third-party/Empirical/include/emp/base/optional.hpp"
#include "../../../third-party/Empirical/include/emp/base/vector.hpp"
#include "../../../third-party/Empirical/include/emp/tools/keyname_utils.hpp"
#include "../../../third-party/Empirical/include/emp/tools/string_utils.hpp"
#include "sha256_reduce.hpp"
#include "strlen.hpp"
#include "to_alnum.hpp"
namespace dish2 {
namespace internal {
using kv_t = std::pair<std::string, std::string>;
constexpr const char* longlink_suffix = "@longlink";
constexpr const char* meta_suffix = ".meta";
size_t get_longlinked_filename_max( const std::filesystem::path& path ) {
constexpr size_t reserved
= dish2::strlen(longlink_suffix) + dish2::strlen(meta_suffix);
emp_assert( uitsl::safe_greater(
pathconf( path.c_str(), _PC_NAME_MAX ), reserved
) );
return pathconf( path.c_str(), _PC_NAME_MAX ) - reserved;
}
std::string strip_underscore_keys( const std::string& filename ){
auto attrs = emp::keyname::unpack( filename );
std::erase_if( attrs, []( const auto& kv ){
return kv.first.size() && kv.first[0] == '_';
} );
return emp::keyname::pack( attrs );
}
emp::optional<std::string> longlink_longest_value(
const std::string& filename, const bool try_a=false
) {
// get the key longest value
auto attrs = emp::keyname::unpack( filename );
if ( !try_a ) attrs.erase( "a" );
attrs.erase( "_" );
attrs.erase( "ext" );
// exclude existing longlinks
std::erase_if(
attrs, []( const auto& kv ){ return kv.second.rfind( "longlink@" ) == 0; }
);
const auto& [key, longest_value] = *std::max_element(
std::begin( attrs ), std::end( attrs ),
[]( const auto& left, const auto& right ){
return left.second.size() < right.second.size();
}
);
const std::string candidate_replacement = emp::to_string(
"longlink@", dish2::to_alnum(dish2::sha256_reduce( longest_value ))
);
if ( candidate_replacement.size() < longest_value.size() ) {
auto out_attrs = emp::keyname::unpack( filename );
out_attrs[ key ] = candidate_replacement;
return emp::keyname::pack( out_attrs );
} else if ( !try_a ) return longlink_longest_value(filename, true);
else return std::nullopt;
}
std::string longlink_entire_filename(
const std::string& filename,
const std::filesystem::path& path,
const bool try_a=true
) {
auto attrs = emp::keyname::unpack( filename );
std::erase_if( attrs, [try_a]( const auto& kv ){
const auto& [k, v] = kv;
const bool keep = k == "ext" || (try_a && k == "a");
return !keep;
});
const std::string uid = dish2::to_alnum( dish2::sha256_reduce(filename) );
attrs["longlink"] = uid;
const auto res = emp::keyname::pack( attrs );
if ( res.size() <= internal::get_longlinked_filename_max( path ) ) return res;
else if ( try_a ) return longlink_entire_filename( filename, path, false );
else return emp::keyname::pack({
{"a", uid},
{"ext", ".longlinked"}
});
}
} // namespace internal
std::filesystem::path make_longlinked_metadata_path( std::filesystem::path p ) {
p += internal::longlink_suffix;
emp_assert(
uitsl::safe_leq(
p.filename().string().size(),
pathconf(std::filesystem::absolute(p).parent_path().c_str(), _PC_NAME_MAX)
),
p.filename(), p.filename().string().size(),
p, std::filesystem::absolute(p).parent_path(),
pathconf( std::filesystem::absolute(p).parent_path().c_str(), _PC_NAME_MAX )
);
return p;
}
std::string pare_keyname_filename(
const std::string& original_filename, const std::filesystem::path& path="./"
) {
emp_assert( std::filesystem::exists( path ) );
const size_t filename_max = pathconf( path.c_str(), _PC_NAME_MAX );
const size_t longlinked_filename_max
= internal::get_longlinked_filename_max( path );
const size_t projected_size
= original_filename.size() + std::strlen(internal::meta_suffix);
if ( projected_size <= filename_max ) return original_filename;
std::string filename = internal::strip_underscore_keys( original_filename );
while ( filename.size() > longlinked_filename_max ) {
const auto res = internal::longlink_longest_value( filename );
if ( res ) filename = *res;
else filename = internal::longlink_entire_filename(original_filename, path);
}
emp_assert( filename.size() <= internal::get_longlinked_filename_max(path) );
std::cout << "paring filename " << original_filename
<< " -> " << filename << std::endl;
// create additional suffixed-file at path/ containing original filename
const auto meta_path = dish2::make_longlinked_metadata_path(path / filename);
std::ofstream( meta_path ) << original_filename << std::endl;
emp_assert( std::filesystem::exists( meta_path ) );
// return shortened filename
emp_assert( filename.size() <= filename_max );
return filename;
}
} // namespace dish2
#endif // #ifndef DISH2_UTILITY_PARE_KEYNAME_FILENAME_HPP_INCLUDE