1 
2 #include "EXTERN.h"
3 #include "perl.h"
4 #include "XSUB.h"
5 
6 /* These 5 files are prepared by mkheader */
7 #include "unfcmb.h"
8 #include "unfcan.h"
9 #include "unfcpt.h"
10 #include "unfcmp.h"
11 #include "unfexc.h"
12 
13 /* Perl 5.6.1 ? */
14 #ifndef uvuni_to_utf8
15 #define uvuni_to_utf8   uv_to_utf8
16 #endif /* uvuni_to_utf8 */
17 
18 /* Perl 5.6.1 ? */
19 #ifndef utf8n_to_uvuni
20 #define utf8n_to_uvuni  utf8_to_uv
21 #endif /* utf8n_to_uvuni */
22 
23 /* UTF8_ALLOW_BOM is used before Perl 5.8.0 */
24 #ifdef UTF8_ALLOW_BOM
25 #define AllowAnyUTF (UTF8_ALLOW_SURROGATE|UTF8_ALLOW_BOM|UTF8_ALLOW_FFFF)
26 #else
27 #define AllowAnyUTF (UTF8_ALLOW_SURROGATE|UTF8_ALLOW_FFFF)
28 #endif
29 
30 /* if utf8n_to_uvuni() sets retlen to 0 (?) */
31 #define ErrRetlenIsZero "panic (Unicode::Normalize): zero-length character"
32 
33 /* utf8_hop() hops back before start. Maybe broken UTF-8 */
34 #define ErrHopBeforeStart "panic (Unicode::Normalize): hopping before start"
35 
36 /* At present, char > 0x10ffff are unaffected without complaint, right? */
37 #define VALID_UTF_MAX    (0x10ffff)
38 #define OVER_UTF_MAX(uv) (VALID_UTF_MAX < (uv))
39 
40 /* HANGUL_H */
41 #define Hangul_SBase  0xAC00
42 #define Hangul_SFinal 0xD7A3
43 #define Hangul_SCount  11172
44 
45 #define Hangul_NCount    588
46 
47 #define Hangul_LBase  0x1100
48 #define Hangul_LFinal 0x1112
49 #define Hangul_LCount     19
50 
51 #define Hangul_VBase  0x1161
52 #define Hangul_VFinal 0x1175
53 #define Hangul_VCount     21
54 
55 #define Hangul_TBase  0x11A7
56 #define Hangul_TFinal 0x11C2
57 #define Hangul_TCount     28
58 
59 #define Hangul_IsS(u)  ((Hangul_SBase <= (u)) && ((u) <= Hangul_SFinal))
60 #define Hangul_IsN(u)  (((u) - Hangul_SBase) % Hangul_TCount == 0)
61 #define Hangul_IsLV(u) (Hangul_IsS(u) && Hangul_IsN(u))
62 #define Hangul_IsL(u)  ((Hangul_LBase <= (u)) && ((u) <= Hangul_LFinal))
63 #define Hangul_IsV(u)  ((Hangul_VBase <= (u)) && ((u) <= Hangul_VFinal))
64 #define Hangul_IsT(u)  ((Hangul_TBase  < (u)) && ((u) <= Hangul_TFinal))
65 /* HANGUL_H */
66 
67 /* this is used for canonical ordering of combining characters (c.c.). */
68 typedef struct {
69     U8 cc;	/* combining class */
70     UV uv;	/* codepoint */
71     STRLEN pos; /* position */
72 } UNF_cc;
73 
compare_cc(const void * a,const void * b)74 static int compare_cc (const void *a, const void *b)
75 {
76     int ret_cc;
77     ret_cc = ((UNF_cc*) a)->cc - ((UNF_cc*) b)->cc;
78     if (ret_cc)
79 	return ret_cc;
80 
81     return ( ((UNF_cc*) a)->pos > ((UNF_cc*) b)->pos )
82 	 - ( ((UNF_cc*) a)->pos < ((UNF_cc*) b)->pos );
83 }
84 
dec_canonical(UV uv)85 static U8* dec_canonical (UV uv)
86 {
87     U8 ***plane, **row;
88     if (OVER_UTF_MAX(uv))
89 	return NULL;
90     plane = (U8***)UNF_canon[uv >> 16];
91     if (! plane)
92 	return NULL;
93     row = plane[(uv >> 8) & 0xff];
94     return row ? row[uv & 0xff] : NULL;
95 }
96 
dec_compat(UV uv)97 static U8* dec_compat (UV uv)
98 {
99     U8 ***plane, **row;
100     if (OVER_UTF_MAX(uv))
101 	return NULL;
102     plane = (U8***)UNF_compat[uv >> 16];
103     if (! plane)
104 	return NULL;
105     row = plane[(uv >> 8) & 0xff];
106     return row ? row[uv & 0xff] : NULL;
107 }
108 
composite_uv(UV uv,UV uv2)109 static UV composite_uv (UV uv, UV uv2)
110 {
111     UNF_complist ***plane, **row, *cell, *i;
112 
113     if (! uv2 || OVER_UTF_MAX(uv) || OVER_UTF_MAX(uv2))
114 	return 0;
115 
116     if (Hangul_IsL(uv) && Hangul_IsV(uv2)) {
117 	uv  -= Hangul_LBase; /* lindex */
118 	uv2 -= Hangul_VBase; /* vindex */
119 	return(Hangul_SBase + (uv * Hangul_VCount + uv2) * Hangul_TCount);
120     }
121     if (Hangul_IsLV(uv) && Hangul_IsT(uv2)) {
122 	uv2 -= Hangul_TBase; /* tindex */
123 	return(uv + uv2);
124     }
125     plane = UNF_compos[uv >> 16];
126     if (! plane)
127 	return 0;
128     row = plane[(uv >> 8) & 0xff];
129     if (! row)
130 	return 0;
131     cell = row[uv & 0xff];
132     if (! cell)
133 	return 0;
134     for (i = cell; i->nextchar; i++) {
135 	if (uv2 == i->nextchar)
136 	    return i->composite;
137     }
138     return 0;
139 }
140 
getCombinClass(UV uv)141 static U8 getCombinClass (UV uv)
142 {
143     U8 **plane, *row;
144     if (OVER_UTF_MAX(uv))
145 	return 0;
146     plane = (U8**)UNF_combin[uv >> 16];
147     if (! plane)
148 	return 0;
149     row = plane[(uv >> 8) & 0xff];
150     return row ? row[uv & 0xff] : 0;
151 }
152 
sv_cat_decompHangul(SV * sv,UV uv)153 static void sv_cat_decompHangul (SV* sv, UV uv)
154 {
155     UV sindex, lindex, vindex, tindex;
156     U8 *t, tmp[3 * UTF8_MAXLEN + 1];
157 
158     if (! Hangul_IsS(uv))
159 	return;
160 
161     sindex =  uv - Hangul_SBase;
162     lindex =  sindex / Hangul_NCount;
163     vindex = (sindex % Hangul_NCount) / Hangul_TCount;
164     tindex =  sindex % Hangul_TCount;
165 
166     t = tmp;
167     t = uvuni_to_utf8(t, (lindex + Hangul_LBase));
168     t = uvuni_to_utf8(t, (vindex + Hangul_VBase));
169     if (tindex)
170 	t = uvuni_to_utf8(t, (tindex + Hangul_TBase));
171     *t = '\0';
172     sv_catpvn(sv, (char *)tmp, t - tmp);
173 }
174 
sv_cat_uvuni(SV * sv,UV uv)175 static void sv_cat_uvuni (SV* sv, UV uv)
176 {
177     U8 *t, tmp[UTF8_MAXLEN + 1];
178 
179     t = tmp;
180     t = uvuni_to_utf8(t, uv);
181     *t = '\0';
182     sv_catpvn(sv, (char *)tmp, t - tmp);
183 }
184 
sv_2pvunicode(SV * sv,STRLEN * lp)185 static char * sv_2pvunicode(SV *sv, STRLEN *lp)
186 {
187     char *s;
188     STRLEN len;
189     s = (char*)SvPV(sv,len);
190     if (!SvUTF8(sv)) {
191 	SV* tmpsv = sv_mortalcopy(sv);
192 	if (!SvPOK(tmpsv))
193 	    (void)sv_pvn_force(tmpsv,&len);
194 	sv_utf8_upgrade(tmpsv);
195 	s = (char*)SvPV(tmpsv,len);
196     }
197     *lp = len;
198     return s;
199 }
200 
201 MODULE = Unicode::Normalize	PACKAGE = Unicode::Normalize
202 
203 SV*
204 decompose(src, compat = &PL_sv_no)
205     SV * src
206     SV * compat
207   PROTOTYPE: $;$
208   PREINIT:
209     SV *dst;
210     STRLEN srclen, retlen;
211     U8 *s, *e, *p, *r;
212     UV uv;
213     bool iscompat;
214   CODE:
215     iscompat = SvTRUE(compat);
216     s = (U8*)sv_2pvunicode(src,&srclen);
217     e = s + srclen;
218 
219     dst = newSV(1);
220     (void)SvPOK_only(dst);
221     SvUTF8_on(dst);
222 
223     for (p = s; p < e; p += retlen) {
224 	uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
225 	if (!retlen)
226 	    croak(ErrRetlenIsZero);
227 
228 	if (Hangul_IsS(uv))
229 	    sv_cat_decompHangul(dst, uv);
230 	else {
231 	    r = iscompat ? dec_compat(uv) : dec_canonical(uv);
232 	    if (r)
233 		sv_catpv(dst, (char *)r);
234 	    else
235 		sv_cat_uvuni(dst, uv);
236 	}
237     }
238     RETVAL = dst;
239   OUTPUT:
240     RETVAL
241 
242 
243 
244 SV*
245 reorder(src)
246     SV * src
247   PROTOTYPE: $
248   PREINIT:
249     SV *dst;
250     STRLEN srclen, dstlen, retlen, stk_cc_max;
251     U8 *s, *e, *p, *d, curCC;
252     UV uv, uvlast;
253     UNF_cc * stk_cc;
254     STRLEN i, cc_pos;
255     bool valid_uvlast;
256   CODE:
257     s = (U8*)sv_2pvunicode(src,&srclen);
258     e = s + srclen;
259 
260     dstlen = srclen + 1;
261     dst = newSV(dstlen);
262     (void)SvPOK_only(dst);
263     SvUTF8_on(dst);
264     d = (U8*)SvPVX(dst);
265 
266     stk_cc_max = 10; /* enough as an initial value? */
267     New(0, stk_cc, stk_cc_max, UNF_cc);
268 
269     for (p = s; p < e;) {
270 	uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
271 	if (!retlen)
272 	    croak(ErrRetlenIsZero);
273 	p += retlen;
274 
275 	curCC = getCombinClass(uv);
276 	if (curCC == 0) {
277 	    d = uvuni_to_utf8(d, uv);
278 	    continue;
279 	}
280 
281 	cc_pos = 0;
282 	stk_cc[cc_pos].cc  = curCC;
283 	stk_cc[cc_pos].uv  = uv;
284 	stk_cc[cc_pos].pos = cc_pos;
285 
286 	valid_uvlast = FALSE;
287 	while (p < e) {
288 	    uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
289 	    if (!retlen)
290 		croak(ErrRetlenIsZero);
291 	    p += retlen;
292 
293 	    curCC = getCombinClass(uv);
294 	    if (curCC == 0) {
295 		uvlast = uv;
296 		valid_uvlast = TRUE;
297 		break;
298 	    }
299 
300 	    cc_pos++;
301 	    if (stk_cc_max <= cc_pos) { /* extend if need */
302 		stk_cc_max = cc_pos + 1;
303 		Renew(stk_cc, stk_cc_max, UNF_cc);
304 	    }
305 	    stk_cc[cc_pos].cc  = curCC;
306 	    stk_cc[cc_pos].uv  = uv;
307 	    stk_cc[cc_pos].pos = cc_pos;
308 	}
309 
310 	/* reordered if there are two c.c.'s */
311 	if (cc_pos) {
312 	    qsort((void*)stk_cc, cc_pos + 1, sizeof(UNF_cc), compare_cc);
313 	}
314 
315 	for (i = 0; i <= cc_pos; i++) {
316 	    d = uvuni_to_utf8(d, stk_cc[i].uv);
317 	}
318 	if (valid_uvlast)
319 	{
320 	    d = uvuni_to_utf8(d, uvlast);
321 	}
322     }
323     *d = '\0';
324     SvCUR_set(dst, d - (U8*)SvPVX(dst));
325     Safefree(stk_cc);
326     RETVAL = dst;
327   OUTPUT:
328     RETVAL
329 
330 
331 
332 SV*
333 compose(src)
334     SV * src
335   PROTOTYPE: $
336   ALIAS:
337     composeContiguous = 1
338   PREINIT:
339     SV  *dst, *tmp;
340     U8  *s, *p, *e, *d, *t, *tmp_start, curCC, preCC;
341     UV uv, uvS, uvComp;
342     STRLEN srclen, dstlen, tmplen, retlen;
343     bool beginning = TRUE;
344   CODE:
345     s = (U8*)sv_2pvunicode(src,&srclen);
346     e = s + srclen;
347 
348     dstlen = srclen + 1;
349     dst = newSV(dstlen);
350     (void)SvPOK_only(dst);
351     SvUTF8_on(dst);
352     d = (U8*)SvPVX(dst);
353 
354   /* for uncomposed combining char */
355     tmp = sv_2mortal(newSV(dstlen));
356     (void)SvPOK_only(tmp);
357     SvUTF8_on(tmp);
358 
359     for (p = s; p < e;) {
360 	if (beginning) {
361 	    uvS = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
362 	    if (!retlen)
363 		croak(ErrRetlenIsZero);
364 	    p += retlen;
365 
366             if (getCombinClass(uvS)) { /* no Starter found yet */
367 		d = uvuni_to_utf8(d, uvS);
368 		continue;
369 	    }
370             beginning = FALSE;
371 	}
372 
373     /* Starter */
374 	t = tmp_start = (U8*)SvPVX(tmp);
375 	preCC = 0;
376 
377     /* to the next Starter */
378 	while (p < e) {
379 	    uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
380 	    if (!retlen)
381 		croak(ErrRetlenIsZero);
382 	    p += retlen;
383 
384 	    curCC = getCombinClass(uv);
385 
386 	    if (preCC && preCC == curCC) {
387 		preCC = curCC;
388 		t = uvuni_to_utf8(t, uv);
389 	    } else {
390 		uvComp = composite_uv(uvS, uv);
391 
392 		if (uvComp && ! isExclusion(uvComp) &&
393 			(ix ? (t == tmp_start) : (preCC <= curCC))) {
394 		    STRLEN leftcur, rightcur, dstcur;
395 		    leftcur  = UNISKIP(uvComp);
396 		    rightcur = UNISKIP(uvS) + UNISKIP(uv);
397 
398 		    if (leftcur > rightcur) {
399 			dstcur = d - (U8*)SvPVX(dst);
400 			dstlen += leftcur - rightcur;
401 			d = (U8*)SvGROW(dst,dstlen) + dstcur;
402 		    }
403 		    /* preCC not changed to curCC */
404 		    uvS = uvComp;
405 		} else if (! curCC && p < e) { /* blocked */
406 		    break;
407 		} else {
408 		    preCC = curCC;
409 		    t = uvuni_to_utf8(t, uv);
410 		}
411 	    }
412 	}
413 	d = uvuni_to_utf8(d, uvS); /* starter (composed or not) */
414 	tmplen = t - tmp_start;
415 	if (tmplen) { /* uncomposed combining char */
416 	    t = (U8*)SvPVX(tmp);
417 	    while (tmplen--)
418 		*d++ = *t++;
419 	}
420 	uvS = uv;
421     } /* for */
422     *d = '\0';
423     SvCUR_set(dst, d - (U8*)SvPVX(dst));
424     RETVAL = dst;
425   OUTPUT:
426     RETVAL
427 
428 
429 void
430 checkNFD(src)
431     SV * src
432   PROTOTYPE: $
433   ALIAS:
434     checkNFKD = 1
435   PREINIT:
436     STRLEN srclen, retlen;
437     U8 *s, *e, *p, curCC, preCC;
438     UV uv;
439   CODE:
440     s = (U8*)sv_2pvunicode(src,&srclen);
441     e = s + srclen;
442 
443     preCC = 0;
444     for (p = s; p < e; p += retlen) {
445 	uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
446 	if (!retlen)
447 	    croak(ErrRetlenIsZero);
448 
449 	curCC = getCombinClass(uv);
450 	if (preCC > curCC && curCC != 0) /* canonical ordering violated */
451 	    XSRETURN_NO;
452 	if (Hangul_IsS(uv) || (ix ? dec_compat(uv) : dec_canonical(uv)))
453 	    XSRETURN_NO;
454 	preCC = curCC;
455     }
456     XSRETURN_YES;
457 
458 
459 
460 void
461 checkNFC(src)
462     SV * src
463   PROTOTYPE: $
464   ALIAS:
465     checkNFKC = 1
466   PREINIT:
467     STRLEN srclen, retlen;
468     U8 *s, *e, *p, curCC, preCC;
469     UV uv;
470     bool isMAYBE;
471   CODE:
472     s = (U8*)sv_2pvunicode(src,&srclen);
473     e = s + srclen;
474 
475     preCC = 0;
476     isMAYBE = FALSE;
477     for (p = s; p < e; p += retlen) {
478 	uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
479 	if (!retlen)
480 	    croak(ErrRetlenIsZero);
481 
482 	curCC = getCombinClass(uv);
483 
484 	if (preCC > curCC && curCC != 0) /* canonical ordering violated */
485 	    XSRETURN_NO;
486 
487 	/* get NFC/NFKC property */
488 	if (Hangul_IsS(uv)) /* Hangul syllables are canonical composites */
489 	    ; /* YES */
490 	else if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv))
491 	    XSRETURN_NO;
492 	else if (isComp2nd(uv))
493 	    isMAYBE = TRUE;
494 	else if (ix) {
495 	    char *canon, *compat;
496 	  /* NFKC_NO when having compatibility mapping. */
497 	    canon  = (char *) dec_canonical(uv);
498 	    compat = (char *) dec_compat(uv);
499 	    if (compat && !(canon && strEQ(canon, compat)))
500 		XSRETURN_NO;
501 	} /* end of get NFC/NFKC property */
502 
503 	preCC = curCC;
504     }
505     if (isMAYBE)
506 	XSRETURN_UNDEF;
507     else
508 	XSRETURN_YES;
509 
510 
511 
512 void
513 checkFCD(src)
514     SV * src
515   PROTOTYPE: $
516   ALIAS:
517     checkFCC = 1
518   PREINIT:
519     STRLEN srclen, retlen, canlen, canret;
520     U8 *s, *e, *p, curCC, preCC;
521     UV uv, uvLead, uvTrail;
522     U8 *sCan, *pCan, *eCan;
523     bool isMAYBE;
524   CODE:
525     s = (U8*)sv_2pvunicode(src,&srclen);
526     e = s + srclen;
527 
528     preCC = 0;
529     isMAYBE = FALSE;
530     for (p = s; p < e; p += retlen) {
531 	uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
532 	if (!retlen)
533 	    croak(ErrRetlenIsZero);
534 
535 	sCan = (U8*) dec_canonical(uv);
536 
537 	if (sCan) {
538 	    canlen = (STRLEN)strlen((char *) sCan);
539 	    uvLead = utf8n_to_uvuni(sCan, canlen, &canret, AllowAnyUTF);
540 	}
541 	else {
542 	    uvLead = uv;
543 	}
544 
545 	curCC = getCombinClass(uvLead);
546 
547 	if (curCC != 0 && curCC < preCC) /* canonical ordering violated */
548 	    XSRETURN_NO;
549 
550 	if (ix) {
551 	    if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv))
552 		XSRETURN_NO;
553 	    else if (isComp2nd(uv))
554 		isMAYBE = TRUE;
555 	}
556 
557 	if (sCan) {
558 	    eCan = sCan + canlen;
559 	    pCan = utf8_hop(eCan, -1);
560 	    if (pCan < sCan)
561 		croak(ErrHopBeforeStart);
562 	    uvTrail = utf8n_to_uvuni(pCan, eCan - pCan, &canret, AllowAnyUTF);
563 	    preCC = getCombinClass(uvTrail);
564 	}
565 	else {
566 	    preCC = curCC;
567 	}
568     }
569     if (isMAYBE)
570 	XSRETURN_UNDEF;
571     else
572 	XSRETURN_YES;
573 
574 
575 
576 U8
577 getCombinClass(uv)
578     UV uv
579   PROTOTYPE: $
580 
581 bool
582 isExclusion(uv)
583     UV uv
584   PROTOTYPE: $
585 
586 bool
587 isSingleton(uv)
588     UV uv
589   PROTOTYPE: $
590 
591 bool
592 isNonStDecomp(uv)
593     UV uv
594   PROTOTYPE: $
595 
596 bool
597 isComp2nd(uv)
598     UV uv
599   PROTOTYPE: $
600   ALIAS:
601     isNFC_MAYBE  = 1
602     isNFKC_MAYBE = 2
603 
604 
605 
606 void
607 isNFD_NO(uv)
608     UV uv
609   PROTOTYPE: $
610   ALIAS:
611     isNFKD_NO = 1
612   CODE:
613     if (Hangul_IsS(uv) || (ix ? dec_compat(uv) : dec_canonical(uv)))
614 	XSRETURN_YES; /* NFD_NO or NFKD_NO */
615     else
616 	XSRETURN_NO;
617 
618 
619 
620 void
621 isComp_Ex(uv)
622     UV uv
623   PROTOTYPE: $
624   ALIAS:
625     isNFC_NO  = 0
626     isNFKC_NO = 1
627   CODE:
628     if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv))
629 	XSRETURN_YES; /* NFC_NO or NFKC_NO */
630     else if (ix) {
631 	char *canon, *compat;
632 	canon  = (char *) dec_canonical(uv);
633 	compat = (char *) dec_compat(uv);
634 	if (compat && (!canon || strNE(canon, compat)))
635 	    XSRETURN_YES; /* NFC_NO or NFKC_NO */
636 	else
637 	    XSRETURN_NO;
638     }
639     else
640 	XSRETURN_NO;
641 
642 
643 
644 SV*
645 getComposite(uv, uv2)
646     UV uv
647     UV uv2
648   PROTOTYPE: $$
649   PREINIT:
650     UV composite;
651   CODE:
652     composite = composite_uv(uv, uv2);
653     RETVAL = composite ? newSVuv(composite) : &PL_sv_undef;
654   OUTPUT:
655     RETVAL
656 
657 
658 
659 SV*
660 getCanon(uv)
661     UV uv
662   PROTOTYPE: $
663   ALIAS:
664     getCompat = 1
665   PREINIT:
666     U8 * rstr;
667   CODE:
668     if (Hangul_IsS(uv)) {
669 	SV * dst;
670 	dst = newSV(1);
671 	(void)SvPOK_only(dst);
672 	sv_cat_decompHangul(dst, uv);
673 	RETVAL = dst;
674     } else {
675 	rstr = ix ? dec_compat(uv) : dec_canonical(uv);
676 	if (!rstr)
677 	    XSRETURN_UNDEF;
678 	RETVAL = newSVpvn((char *)rstr, strlen((char *)rstr));
679     }
680     SvUTF8_on(RETVAL);
681   OUTPUT:
682     RETVAL
683 
684 
685 void
686 splitOnLastStarter(src)
687     SV * src
688   PREINIT:
689     SV *svp;
690     STRLEN srclen, retlen;
691     U8 *s, *e, *p;
692     UV uv;
693   PPCODE:
694     s = (U8*)sv_2pvunicode(src,&srclen);
695     e = s + srclen;
696 
697     for (p = e; s < p; ) {
698 	p = utf8_hop(p, -1);
699 	if (p < s)
700 	    croak(ErrHopBeforeStart);
701 	uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);
702 	if (getCombinClass(uv) == 0) /* Last Starter found */
703 	    break;
704     }
705 
706     svp = sv_2mortal(newSVpvn((char*)s, p - s));
707     SvUTF8_on(svp);
708     XPUSHs(svp);
709 
710     svp = sv_2mortal(newSVpvn((char*)p, e - p));
711     SvUTF8_on(svp);
712     XPUSHs(svp);
713 
714