forked from chromium/chromium
-
Notifications
You must be signed in to change notification settings - Fork 0
/
filename_generation.cc
187 lines (161 loc) · 6.89 KB
/
filename_generation.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/filename_generation/filename_generation.h"
#include "base/files/file_path.h"
#include "base/files/file_util.h"
#include "base/i18n/file_util_icu.h"
#include "base/logging.h"
#include "base/strings/string_util.h"
#include "base/strings/sys_string_conversions.h"
#include "base/strings/utf_string_conversions.h"
#include "base/third_party/icu/icu_utf.h"
#include "base/threading/thread_restrictions.h"
#include "build/build_config.h"
#include "components/url_formatter/url_formatter.h"
#include "net/base/filename_util.h"
#include "net/base/mime_util.h"
#include "url/gurl.h"
namespace filename_generation {
namespace {
// The lower bound for file name truncation. If the truncation results in a name
// shorter than this limit, we give up automatic truncation and prompt the user.
const size_t kTruncatedNameLengthLowerbound = 5;
const base::FilePath::CharType kDefaultHtmlExtension[] =
FILE_PATH_LITERAL("html");
// Check whether we can save page as complete-HTML for the contents which
// have specified a MIME type. Now only contents which have the MIME type
// "text/html" can be saved as complete-HTML.
bool CanSaveAsComplete(const std::string& contents_mime_type) {
return contents_mime_type == "text/html" ||
contents_mime_type == "application/xhtml+xml";
}
} // namespace
const base::FilePath::CharType* ExtensionForMimeType(
const std::string& contents_mime_type) {
static const struct {
const char* mime_type;
const base::FilePath::CharType* suggested_extension;
} kExtensions[] = {
{"text/html", kDefaultHtmlExtension},
{"text/xml", FILE_PATH_LITERAL("xml")},
{"application/xhtml+xml", FILE_PATH_LITERAL("xhtml")},
{"text/plain", FILE_PATH_LITERAL("txt")},
{"text/css", FILE_PATH_LITERAL("css")},
{"multipart/related", FILE_PATH_LITERAL("mhtml")},
};
for (const auto& extension : kExtensions) {
if (contents_mime_type == extension.mime_type)
return extension.suggested_extension;
}
return FILE_PATH_LITERAL("");
}
base::FilePath EnsureHtmlExtension(const base::FilePath& name) {
base::AssertBlockingAllowed();
base::FilePath::StringType ext = name.Extension();
if (!ext.empty())
ext.erase(ext.begin()); // Erase preceding '.'.
std::string mime_type;
if (!net::GetMimeTypeFromExtension(ext, &mime_type) ||
!CanSaveAsComplete(mime_type)) {
return base::FilePath(name.value() + FILE_PATH_LITERAL(".") +
kDefaultHtmlExtension);
}
return name;
}
base::FilePath EnsureMimeExtension(const base::FilePath& name,
const std::string& contents_mime_type) {
base::AssertBlockingAllowed();
// Start extension at 1 to skip over period if non-empty.
base::FilePath::StringType ext = name.Extension();
if (!ext.empty())
ext = ext.substr(1);
base::FilePath::StringType suggested_extension =
ExtensionForMimeType(contents_mime_type);
std::string mime_type;
if (!suggested_extension.empty() &&
!net::GetMimeTypeFromExtension(ext, &mime_type)) {
// Extension is absent or needs to be updated.
return base::FilePath(name.value() + FILE_PATH_LITERAL(".") +
suggested_extension);
}
// Special treatment for MHTML: we would always want to add ".mhtml" as the
// extension even if there's another recognized mime_type based on |ext|.
// For example: the name is "page.html", we would like to have
// "page.html.mhtml" instead of "page.html".
if (contents_mime_type == "multipart/related" &&
mime_type != "multipart/related") {
return base::FilePath(name.value() + FILE_PATH_LITERAL(".mhtml"));
}
return name;
}
base::FilePath GenerateFilename(const base::string16& title,
const GURL& url,
bool can_save_as_complete,
std::string contents_mime_type) {
base::FilePath name_with_proper_ext = base::FilePath::FromUTF16Unsafe(title);
// If the page's title matches its URL, use the URL. Try to use the last path
// component or if there is none, the domain as the file name.
// Normally we want to base the filename on the page title, or if it doesn't
// exist, on the URL. It's not easy to tell if the page has no title, because
// if the page has no title, WebContents::GetTitle() will return the page's
// URL (adjusted for display purposes). Therefore, we convert the "title"
// back to a URL, and if it matches the original page URL, we know the page
// had no title (or had a title equal to its URL, which is fine to treat
// similarly).
if (title == url_formatter::FormatUrl(url)) {
std::string url_path;
if (!url.SchemeIs(url::kDataScheme)) {
name_with_proper_ext = net::GenerateFileName(
url, std::string(), std::string(), std::string(), contents_mime_type,
std::string());
// If host is used as file name, try to decode punycode.
if (name_with_proper_ext.AsUTF8Unsafe() == url.host()) {
name_with_proper_ext = base::FilePath::FromUTF16Unsafe(
url_formatter::IDNToUnicode(url.host()));
}
} else {
name_with_proper_ext = base::FilePath::FromUTF8Unsafe("dataurl");
}
}
// Ask user for getting final saving name.
name_with_proper_ext =
EnsureMimeExtension(name_with_proper_ext, contents_mime_type);
// Adjust extension for complete types.
if (can_save_as_complete)
name_with_proper_ext = EnsureHtmlExtension(name_with_proper_ext);
base::FilePath::StringType file_name = name_with_proper_ext.value();
base::i18n::ReplaceIllegalCharactersInPath(&file_name, '_');
return base::FilePath(file_name);
}
bool TruncateFilename(base::FilePath* path, size_t limit) {
base::FilePath basename(path->BaseName());
// It is already short enough.
if (basename.value().size() <= limit)
return true;
base::FilePath dir(path->DirName());
base::FilePath::StringType ext(basename.Extension());
base::FilePath::StringType name(basename.RemoveExtension().value());
// Impossible to satisfy the limit.
if (limit < kTruncatedNameLengthLowerbound + ext.size())
return false;
limit -= ext.size();
// Encoding specific truncation logic.
base::FilePath::StringType truncated;
#if defined(OS_CHROMEOS) || defined(OS_MACOSX)
// UTF-8.
base::TruncateUTF8ToByteSize(name, limit, &truncated);
#elif defined(OS_WIN)
// UTF-16.
DCHECK(name.size() > limit);
truncated = name.substr(0, CBU16_IS_TRAIL(name[limit]) ? limit - 1 : limit);
#else
// We cannot generally assume that the file name encoding is in UTF-8 (see
// the comment for FilePath::AsUTF8Unsafe), hence no safe way to truncate.
#endif
if (truncated.size() < kTruncatedNameLengthLowerbound)
return false;
*path = dir.Append(truncated + ext);
return true;
}
} // namespace filename_generation