1: <?php
2:
3: /*
4: * Note: Zenphoto does not want html entities encoded. This script has been modified
5: * to prevent the encodings. Search for Zenphoto for changes.
6: */
7:
8: # kses 0.2.2 - HTML/XHTML filter that only allows some elements and attributes
9: # Copyright (C) 2002, 2003, 2005 Ulf Harnhammar
10: #
11: # This program is free software and open source software; you can redistribute
12: # it and/or modify it under the terms of the GNU General Public License as
13: # published by the Free Software Foundation; either version 2 of the License,
14: # or (at your option) any later version.
15: #
16: # This program is distributed in the hope that it will be useful, but WITHOUT
17: # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
18: # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
19: # more details.
20: #
21: # You should have received a copy of the GNU General Public License along
22: # with this program; if not, write to the Free Software Foundation, Inc.,
23: # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA or visit
24: # http://www.gnu.org/licenses/gpl.html
25: #
26: # *** CONTACT INFORMATION ***
27: #
28: # E-mail: metaur at users dot sourceforge dot net
29: # Web page: http://sourceforge.net/projects/kses
30: # Paper mail: Ulf Harnhammar
31: # Ymergatan 17 C
32: # 753 25 Uppsala
33: # SWEDEN
34: #
35: # [kses strips evil scripts!]
36:
37:
38: function kses($string, $allowed_html, $allowed_protocols =
39: array('http', 'https', 'ftp', 'news', 'nntp', 'telnet',
40: 'gopher', 'mailto'))
41: ###############################################################################
42: # This function makes sure that only the allowed HTML element names, attribute
43: # names and attribute values plus only sane HTML entities will occur in
44: # $string. You have to remove any slashes from PHP's magic quotes before you
45: # call this function.
46: ###############################################################################
47: {
48: $string = kses_no_null($string);
49: $string = kses_js_entities($string);
50: // $string = kses_normalize_entities($string); Zenphoto does not want & encoded
51: $string = kses_hook($string);
52: // $allowed_html = kses_array_lc($allowed_html); Zenphoto insures that these are already lowercase
53: return kses_split($string, $allowed_html, $allowed_protocols);
54: } # function kses
55:
56:
57: function kses_hook($string)
58: ###############################################################################
59: # You add any kses hooks here.
60: ###############################################################################
61: {
62: return $string;
63: } # function kses_hook
64:
65:
66: function kses_version()
67: ###############################################################################
68: # This function returns kses' version number.
69: ###############################################################################
70: {
71: return '0.2.2';
72: } # function kses_version
73:
74:
75: function kses_split($string, $allowed_html, $allowed_protocols)
76: ###############################################################################
77: # This function searches for HTML tags, no matter how malformed. It also
78: # matches stray ">" characters.
79: ###############################################################################
80: {
81: global $_allowed_html, $_allowed_protocols;
82: //Zenphoto:preg_replace with the "e" modifier is deprecated, use callback
83: $_allowed_html = $allowed_html;
84: $_allowed_protocols = $allowed_protocols;
85:
86: return preg_replace_callback('%(<'. # EITHER: <
87: '[^>]*'. # things that aren't >
88: '(>|$)'. # > or end of string
89: '|>)%', # OR: just a >
90: "kses_split2",
91: $string);
92: } # function kses_split
93:
94:
95: function kses_split2($matches)
96: ###############################################################################
97: # This function does a lot of work. It rejects some very malformed things
98: # like <:::>. It returns an empty string, if the element isn't allowed (look
99: # ma, no strip_tags()!). Otherwise it splits the tag into an element and an
100: # attribute list.
101: ###############################################################################
102: {
103: //Zenphoto:preg_replace with the "e" modifier is deprecated, this is the callback
104: global $_allowed_html, $_allowed_protocols;
105: $allowed_html = $_allowed_html;
106: $allowed_protocols = $_allowed_protocols;
107: $string = kses_stripslashes($matches[1]);
108: if (substr($string, 0, 1) != '<') {
109: return '>';
110: # It matched a ">" character
111: }
112:
113: if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>$%', $string, $matches)) {
114: return $string;
115: # It's seriously malformed
116: }
117:
118: $slash = trim($matches[1]);
119: $elem = $matches[2];
120: $attrlist = $matches[3];
121:
122: if (!@isset($allowed_html[strtolower($elem)]))
123: return '';
124: # They are using a not allowed HTML element
125:
126: if ($slash != '')
127: return "<$slash$elem>";
128: # No attributes are allowed for closing elements
129:
130: return kses_attr("$slash$elem", $attrlist, $allowed_html,
131: $allowed_protocols);
132: } # function kses_split2
133:
134:
135: function kses_attr($element, $attr, $allowed_html, $allowed_protocols)
136: ###############################################################################
137: # This function removes all attributes, if none are allowed for this element.
138: # If some are allowed it calls kses_hair() to split them further, and then it
139: # builds up new HTML code from the data that kses_hair() returns. It also
140: # removes "<" and ">" characters, if there are any left. One more thing it
141: # does is to check if the tag has a closing XHTML slash, and if it does,
142: # it puts one in the returned code as well.
143: ###############################################################################
144: {
145: # Is there a closing XHTML slash at the end of the attributes?
146:
147: $xhtml_slash = '';
148: if (preg_match('%\s/\s*$%', $attr))
149: $xhtml_slash = ' /';
150:
151: # Are any attributes allowed at all for this element?
152:
153: if (@count($allowed_html[strtolower($element)]) == 0)
154: return "<$element$xhtml_slash>";
155:
156: # Split it
157:
158: $attrarr = kses_hair($attr, $allowed_protocols);
159:
160: # Go through $attrarr, and save the allowed attributes for this element
161: # in $attr2
162:
163: $attr2 = '';
164:
165: foreach ($attrarr as $arreach)
166: {
167: if (!@isset($allowed_html[strtolower($element)]
168: [strtolower($arreach['name'])]))
169: continue; # the attribute is not allowed
170:
171: $current = $allowed_html[strtolower($element)]
172: [strtolower($arreach['name'])];
173:
174: if (!is_array($current))
175: $attr2 .= ' '.$arreach['whole'];
176: # there are no checks
177:
178: else
179: {
180: # there are some checks
181: $ok = true;
182: foreach ($current as $currkey => $currval)
183: if (!kses_check_attr_val($arreach['value'], $arreach['vless'],
184: $currkey, $currval))
185: { $ok = false; break; }
186:
187: if ($ok)
188: $attr2 .= ' '.$arreach['whole']; # it passed them
189: } # if !is_array($current)
190: } # foreach
191:
192: # Remove any "<" or ">" characters
193:
194: $attr2 = preg_replace('/[<>]/', '', $attr2);
195:
196: return "<$element$attr2$xhtml_slash>";
197: } # function kses_attr
198:
199:
200: function kses_hair($attr, $allowed_protocols)
201: ###############################################################################
202: # This function does a lot of work. It parses an attribute list into an array
203: # with attribute data, and tries to do the right thing even if it gets weird
204: # input. It will add quotes around attribute values that don't have any quotes
205: # or apostrophes around them, to make it easier to produce HTML code that will
206: # conform to W3C's HTML specification. It will also remove bad URL protocols
207: # from attribute values.
208: ###############################################################################
209: {
210: $attrarr = array();
211: $mode = 0;
212: $attrname = '';
213:
214: # Loop through the whole attribute list
215:
216: while (strlen($attr) != 0)
217: {
218: $working = 0; # Was the last operation successful?
219:
220: switch ($mode)
221: {
222: case 0: # attribute name, href for instance
223:
224: if (preg_match('/^([-a-zA-Z]+)/', $attr, $match))
225: {
226: $attrname = $match[1];
227: $working = $mode = 1;
228: $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
229: }
230:
231: break;
232:
233: case 1: # equals sign or valueless ("selected")
234:
235: if (preg_match('/^\s*=\s*/', $attr)) # equals sign
236: {
237: $working = 1; $mode = 2;
238: $attr = preg_replace('/^\s*=\s*/', '', $attr);
239: break;
240: }
241:
242: if (preg_match('/^\s+/', $attr)) # valueless
243: {
244: $working = 1; $mode = 0;
245: $attrarr[] = array
246: ('name' => $attrname,
247: 'value' => '',
248: 'whole' => $attrname,
249: 'vless' => 'y');
250: $attr = preg_replace('/^\s+/', '', $attr);
251: }
252:
253: break;
254:
255: case 2: # attribute value, a URL after href= for instance
256:
257: if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match))
258: # "value"
259: {
260: $thisval = kses_bad_protocol($match[1], $allowed_protocols);
261:
262: $attrarr[] = array
263: ('name' => $attrname,
264: 'value' => $thisval,
265: 'whole' => "$attrname=\"$thisval\"",
266: 'vless' => 'n');
267: $working = 1; $mode = 0;
268: $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
269: break;
270: }
271:
272: if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match))
273: # 'value'
274: {
275: $thisval = kses_bad_protocol($match[1], $allowed_protocols);
276:
277: $attrarr[] = array
278: ('name' => $attrname,
279: 'value' => $thisval,
280: 'whole' => "$attrname='$thisval'",
281: 'vless' => 'n');
282: $working = 1; $mode = 0;
283: $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
284: break;
285: }
286:
287: if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match))
288: # value
289: {
290: $thisval = kses_bad_protocol($match[1], $allowed_protocols);
291:
292: $attrarr[] = array
293: ('name' => $attrname,
294: 'value' => $thisval,
295: 'whole' => "$attrname=\"$thisval\"",
296: 'vless' => 'n');
297: # We add quotes to conform to W3C's HTML spec.
298: $working = 1; $mode = 0;
299: $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
300: }
301:
302: break;
303: } # switch
304:
305: if ($working == 0) # not well formed, remove and try again
306: {
307: $attr = kses_html_error($attr);
308: $mode = 0;
309: }
310: } # while
311:
312: if ($mode == 1)
313: # special case, for when the attribute list ends with a valueless
314: # attribute like "selected"
315: $attrarr[] = array
316: ('name' => $attrname,
317: 'value' => '',
318: 'whole' => $attrname,
319: 'vless' => 'y');
320:
321: return $attrarr;
322: } # function kses_hair
323:
324:
325: function kses_check_attr_val($value, $vless, $checkname, $checkvalue)
326: ###############################################################################
327: # This function performs different checks for attribute values. The currently
328: # implemented checks are "maxlen", "minlen", "maxval", "minval" and "valueless"
329: # with even more checks to come soon.
330: ###############################################################################
331: {
332: $ok = true;
333:
334: switch (strtolower($checkname))
335: {
336: case 'maxlen':
337: # The maxlen check makes sure that the attribute value has a length not
338: # greater than the given value. This can be used to avoid Buffer Overflows
339: # in WWW clients and various Internet servers.
340:
341: if (strlen($value) > $checkvalue)
342: $ok = false;
343: break;
344:
345: case 'minlen':
346: # The minlen check makes sure that the attribute value has a length not
347: # smaller than the given value.
348:
349: if (strlen($value) < $checkvalue)
350: $ok = false;
351: break;
352:
353: case 'maxval':
354: # The maxval check does two things: it checks that the attribute value is
355: # an integer from 0 and up, without an excessive amount of zeroes or
356: # whitespace (to avoid Buffer Overflows). It also checks that the attribute
357: # value is not greater than the given value.
358: # This check can be used to avoid Denial of Service attacks.
359:
360: if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
361: $ok = false;
362: if ($value > $checkvalue)
363: $ok = false;
364: break;
365:
366: case 'minval':
367: # The minval check checks that the attribute value is a positive integer,
368: # and that it is not smaller than the given value.
369:
370: if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
371: $ok = false;
372: if ($value < $checkvalue)
373: $ok = false;
374: break;
375:
376: case 'valueless':
377: # The valueless check checks if the attribute has a value
378: # (like <a href="blah">) or not (<option selected>). If the given value
379: # is a "y" or a "Y", the attribute must not have a value.
380: # If the given value is an "n" or an "N", the attribute must have one.
381:
382: if (strtolower($checkvalue) != $vless)
383: $ok = false;
384: break;
385: } # switch
386:
387: return $ok;
388: } # function kses_check_attr_val
389:
390:
391: function kses_bad_protocol($string, $allowed_protocols)
392: ###############################################################################
393: # This function removes all non-allowed protocols from the beginning of
394: # $string. It ignores whitespace and the case of the letters, and it does
395: # understand HTML entities. It does its work in a while loop, so it won't be
396: # fooled by a string like "javascript:javascript:alert(57)".
397: ###############################################################################
398: {
399: $string = kses_no_null($string);
400: $string = preg_replace('/\xad+/', '', $string); # deals with Opera "feature"
401: $string2 = $string.'a';
402:
403: while ($string != $string2)
404: {
405: $string2 = $string;
406: $string = kses_bad_protocol_once($string, $allowed_protocols);
407: } # while
408:
409: return $string;
410: } # function kses_bad_protocol
411:
412:
413: function kses_no_null($string)
414: ###############################################################################
415: # This function removes any NULL characters in $string.
416: ###############################################################################
417: {
418: $string = preg_replace('/\0+/', '', $string);
419: $string = preg_replace('/(\\\\0)+/', '', $string);
420:
421: return $string;
422: } # function kses_no_null
423:
424:
425: function kses_stripslashes($string)
426: ###############################################################################
427: # This function changes the character sequence \" to just "
428: # It leaves all other slashes alone. It's really weird, but the quoting from
429: # preg_replace(//e) seems to require this.
430: ###############################################################################
431: {
432: return preg_replace('%\\\\"%', '"', $string);
433: } # function kses_stripslashes
434:
435:
436: function kses_array_lc($inarray)
437: ###############################################################################
438: # This function goes through an array, and changes the keys to all lower case.
439: ###############################################################################
440: {
441: $outarray = array();
442:
443: foreach ($inarray as $inkey => $inval)
444: {
445: $outkey = strtolower($inkey);
446: $outarray[$outkey] = array();
447:
448: foreach ($inval as $inkey2 => $inval2)
449: {
450: $outkey2 = strtolower($inkey2);
451: $outarray[$outkey][$outkey2] = $inval2;
452: } # foreach $inval
453: } # foreach $inarray
454:
455: return $outarray;
456: } # function kses_array_lc
457:
458:
459: function kses_js_entities($string)
460: ###############################################################################
461: # This function removes the HTML JavaScript entities found in early versions of
462: # Netscape 4.
463: ###############################################################################
464: {
465: return preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
466: } # function kses_js_entities
467:
468:
469: function kses_html_error($string)
470: ###############################################################################
471: # This function deals with parsing errors in kses_hair(). The general plan is
472: # to remove everything to and including some whitespace, but it deals with
473: # quotes and apostrophes as well.
474: ###############################################################################
475: {
476: return preg_replace('/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $string);
477: } # function kses_html_error
478:
479:
480: function kses_bad_protocol_once($string, $allowed_protocols)
481: ###############################################################################
482: # This function searches for URL protocols at the beginning of $string, while
483: # handling whitespace and HTML entities.
484: ###############################################################################
485: {
486:
487: global $_allowed_protocols;
488: //Zenphoto:preg_replace with the "e" modifier is deprecated, use callback
489: $_allowed_protocols = $allowed_protocols;
490:
491: return preg_replace_callback('/^((&[^;]*;|[\sA-Za-z0-9])*)'.
492: '(:|:|&#[Xx]3[Aa];)\s*/',
493: 'kses_bad_protocol_once2',
494: $string);
495: } # function kses_bad_protocol_once
496:
497:
498: function kses_bad_protocol_once2($matches)
499: ###############################################################################
500: # This function processes URL protocols, checks to see if they're in the white-
501: # list or not, and returns different data depending on the answer.
502: ###############################################################################
503: {
504:
505: //Zenphoto:preg_replace with the "e" modifier is deprecated, this is the callback
506: global $_allowed_protocols;
507: $allowed_protocols = $_allowed_protocols;
508:
509: $string2 = kses_decode_entities($matches[1]);
510: $string2 = preg_replace('/\s/', '', $string2);
511: $string2 = kses_no_null($string2);
512: $string2 = preg_replace('/\xad+/', '', $string2);
513: # deals with Opera "feature"
514: $string2 = strtolower($string2);
515:
516: $allowed = false;
517: foreach ($allowed_protocols as $one_protocol)
518: if (strtolower($one_protocol) == $string2)
519: {
520: $allowed = true;
521: break;
522: }
523:
524: if ($allowed)
525: return "$string2:";
526: else
527: return '';
528: } # function kses_bad_protocol_once2
529:
530:
531: function kses_normalize_entities($string)
532: ###############################################################################
533: # This function normalizes HTML entities. It will convert "AT&T" to the correct
534: # "AT&T", ":" to ":", "&#XYZZY;" to "&#XYZZY;" and so on.
535: ###############################################################################
536: {
537: # Disarm all entities by converting & to &
538:
539: $string = str_replace('&', '&', $string);
540:
541: # Change back the allowed entities in our entity whitelist
542:
543: $string = preg_replace('/&([A-Za-z][A-Za-z0-9]{0,19});/',
544: '&\\1;', $string);
545: $string = preg_replace_callback('/&#0*([0-9]{1,5});/',
546: 'kses_normalize_entities2', $string);
547: $string = preg_replace('/&#([Xx])0*(([0-9A-Fa-f]{2}){1,2});/',
548: '&#\\1\\2;', $string);
549:
550: return $string;
551: } # function kses_normalize_entities
552:
553:
554: function kses_normalize_entities2($matches)
555: ###############################################################################
556: # This function helps kses_normalize_entities() to only accept 16 bit values
557: # and nothing more for &#number; entities.
558: ###############################################################################
559: {
560: return (($matches[1] > 65535) ? "&#$i;" : "&#$i;");
561: } # function kses_normalize_entities2
562:
563:
564: function kses_decode_entities($string)
565: ###############################################################################
566: # This function decodes numeric HTML entities (A and A). It doesn't
567: # do anything with other entities like ä, but we don't need them in the
568: # URL protocol whitelisting system anyway.
569: ###############################################################################
570: {
571: $string = preg_replace('/&#([0-9]+);/', 'chr("\\1")', $string);
572: $string = preg_replace('/&#[Xx]([0-9A-Fa-f]+);/', 'chr(hexdec("\\1"))',
573: $string);
574:
575: return $string;
576: } # function kses_decode_entities
577:
578: ?>
579: