forked from moodle/moodle
-
Notifications
You must be signed in to change notification settings - Fork 0
/
filter.php
177 lines (154 loc) · 7.3 KB
/
filter.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
<?php
// This file is part of Moodle - http://moodle.org/
//
// Moodle is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Moodle is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
/**
* Filter converting URLs in the text to HTML links
*
* @package filter
* @subpackage urltolink
* @copyright 2010 David Mudrak <david@moodle.com>
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/
defined('MOODLE_INTERNAL') || die();
class filter_urltolink extends moodle_text_filter {
/**
* @var array global configuration for this filter
*
* This might be eventually moved into parent class if we found it
* useful for other filters, too.
*/
protected static $globalconfig;
/**
* Apply the filter to the text
*
* @see filter_manager::apply_filter_chain()
* @param string $text to be processed by the text
* @param array $options filter options
* @return string text after processing
*/
public function filter($text, array $options = array()) {
if (!isset($options['originalformat'])) {
// if the format is not specified, we are probably called by {@see format_string()}
// in that case, it would be dangerous to replace URL with the link because it could
// be stripped. therefore, we do nothing
return $text;
}
if (in_array($options['originalformat'], explode(',', get_config('filter_urltolink', 'formats')))) {
$this->convert_urls_into_links($text);
}
return $text;
}
////////////////////////////////////////////////////////////////////////////
// internal implementation starts here
////////////////////////////////////////////////////////////////////////////
/**
* Given some text this function converts any URLs it finds into HTML links
*
* @param string $text Passed in by reference. The string to be searched for urls.
*/
protected function convert_urls_into_links(&$text) {
//I've added img tags to this list of tags to ignore.
//See MDL-21168 for more info. A better way to ignore tags whether or not
//they are escaped partially or completely would be desirable. For example:
//<a href="blah">
//<a href="blah">
//<a href="blah">
$filterignoretagsopen = array('<a\s[^>]+?>', '<span[^>]+?class="nolink"[^>]*?>');
$filterignoretagsclose = array('</a>', '</span>');
$ignoretags = [];
filter_save_ignore_tags($text,$filterignoretagsopen,$filterignoretagsclose,$ignoretags);
// Check if we support unicode modifiers in regular expressions. Cache it.
// TODO: this check should be a environment requirement in Moodle 2.0, as far as unicode
// chars are going to arrive to URLs officially really soon (2010?)
// Original RFC regex from: http://www.bytemycode.com/snippets/snippet/796/
// Various ideas from: http://alanstorm.com/url_regex_explained
// Unicode check, negative assertion and other bits from Moodle.
static $unicoderegexp;
if (!isset($unicoderegexp)) {
$unicoderegexp = @preg_match('/\pL/u', 'a'); // This will fail silently, returning false,
}
// TODO MDL-21296 - use of unicode modifiers may cause a timeout
$urlstart = '(?:http(s)?://|(?<!://)(www\.))';
$domainsegment = '(?:[\pLl0-9][\pLl0-9-]*[\pLl0-9]|[\pLl0-9])';
$numericip = '(?:(?:[0-9]{1,3}\.){3}[0-9]{1,3})';
$port = '(?::\d*)';
$pathchar = '(?:[\pL0-9\.!$&\'\(\)*+,;=_~:@-]|%[a-f0-9]{2})';
$path = "(?:/$pathchar*)*";
$querystring = '(?:\?(?:[\pL0-9\.!$&\'\(\)*+,;=_~:@/?-]|%[a-fA-F0-9]{2})*)';
$fragment = '(?:\#(?:[\pL0-9\.!$&\'\(\)*+,;=_~:@/?-]|%[a-fA-F0-9]{2})*)';
// Lookbehind assertions.
// Is not HTML attribute or CSS URL property. Unfortunately legit text like "url(http://...)" will not be a link.
$lookbehindend = "(?<![]),.;])";
$regex = "$urlstart((?:$domainsegment\.)+$domainsegment|$numericip)" .
"($port?$path$querystring?$fragment?)$lookbehindend";
if ($unicoderegexp) {
$regex = '#' . $regex . '#ui';
} else {
$regex = '#' . preg_replace(array('\pLl', '\PL'), 'a-z', $regex) . '#i';
}
// Locate any HTML tags.
$matches = preg_split('/(<[^<|>]*>)/i', $text, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
// Iterate through the tokenized text to handle chunks (html and content).
foreach ($matches as $idx => $chunk) {
// Nothing to do. We skip completely any html chunk.
if (strpos(trim($chunk), '<') === 0) {
continue;
}
// Nothing to do. We skip any content chunk having any of these attributes.
if (preg_match('#(background=")|(action=")|(style="background)|(href=")|(src=")|(url [(])#', $chunk)) {
continue;
}
// Arrived here, we want to process every word in this chunk.
$text = $chunk;
$words = explode(' ', $text);
foreach ($words as $idx2 => $word) {
// ReDoS protection. Stop processing if a word is too large.
if (strlen($word) < 4096) {
$words[$idx2] = preg_replace($regex, '<a href="http$1://$2$3$4" class="_blanktarget">$0</a>', $word);
}
}
$text = implode(' ', $words);
// Copy the result back to the array.
$matches[$idx] = $text;
}
$text = implode('', $matches);
if (!empty($ignoretags)) {
$ignoretags = array_reverse($ignoretags); /// Reversed so "progressive" str_replace() will solve some nesting problems.
$text = str_replace(array_keys($ignoretags),$ignoretags,$text);
}
if (get_config('filter_urltolink', 'embedimages')) {
// now try to inject the images, this code was originally in the mediapluing filter
// this may be useful only if somebody relies on the fact the links in FORMAT_MOODLE get converted
// to URLs which in turn change to real images
$search = '/<a href="([^"]+\.(jpg|png|gif))" class="_blanktarget">([^>]*)<\/a>/is';
$text = preg_replace_callback($search, 'filter_urltolink_img_callback', $text);
}
}
}
/**
* Change links to images into embedded images.
*
* This plugin is intended for automatic conversion of image URLs when FORMAT_MOODLE used.
*
* @param $link
* @return string
*/
function filter_urltolink_img_callback($link) {
if ($link[1] !== $link[3]) {
// this is not a link created by this filter, because the url does not match the text
return $link[0];
}
return '<img class="filter_urltolink_image" alt="" src="'.$link[1].'" />';
}