Another (and hopefully last) major streams commit.

This moves unicode conversion to the filter layer (rather than at the lower streams layer) unicode_filter.c has been moved from ext/unicode to main/streams as it's an integral part of the streams unicode conversion process. There are now three ways to set encoding on a stream: (1) By context $ctx = stream_context_create(NULL,array('encoding'=>'latin1')); $fp = fopen('somefile', 'r+t', false, $ctx); (2) By stream_encoding() $fp = fopen('somefile', 'r+'); stream_encoding($fp, 'latin1'); (3) By filter $fp = fopen('somefile', 'r+'); stream_filter_append($fp, 'unicode.from.latin1', STREAM_FILTER_READ); stream_filter_append($fp, 'unicode.to.latin1', STREAM_FILTER_WRITE); Note: Methods 1 and 2 are convenience wrappers around method 3.
20 years ago · 30a2bd1d11
15 changed files with 276 additions and 239 deletions
--- a/ext/standard/basic_functions.c
+++ b/ext/standard/basic_functions.c
@ -589,6 +589,7 @@ zend_function_entry basic_functions[] = {
 	PHP_FE(stream_filter_prepend,											NULL)
 	PHP_FE(stream_filter_append,											NULL)
 	PHP_FE(stream_filter_remove,											NULL)
+	PHP_FE(stream_encoding,													NULL)
 	PHP_FE(stream_socket_client,				 second_and_third_args_force_ref)
 	PHP_FE(stream_socket_server,				 second_and_third_args_force_ref)
 	PHP_FE(stream_socket_accept,				 		   third_arg_force_ref)
--- a/ext/standard/file.c
+++ b/ext/standard/file.c
@ -1008,14 +1008,14 @@ PHPAPI PHP_FUNCTION(fgets)

 	php_stream_from_zval(stream, &zstream);

-	buf.v = php_stream_get_line_ex(stream, php_stream_reads_unicode(stream) ? IS_UNICODE : IS_STRING, NULL_ZSTR, 0, length, &retlen);
+	buf.v = php_stream_get_line_ex(stream, stream->readbuf_type, NULL_ZSTR, 0, length, &retlen);
 	if (!buf.v) {
 		RETURN_FALSE;
 	}

-	if (php_stream_reads_unicode(stream)) {
+	if (stream->readbuf_type == IS_UNICODE) {
 		RETURN_UNICODEL(buf.u, retlen, 0);
-	} else {
+	} else { /* IS_STRING */
 		RETURN_STRINGL(buf.s, retlen, 0);
 	}
 }
@ -1034,7 +1034,7 @@ PHPAPI PHP_FUNCTION(fgetc)

 	PHP_STREAM_TO_ZVAL(stream, arg1);

-	if (php_stream_reads_unicode(stream)) {
+	if (stream->readbuf_type == IS_UNICODE) {
 		int buflen = 1;
 		UChar *buf = php_stream_read_unicode_chars(stream, &buflen);

@ -1042,7 +1042,7 @@ PHPAPI PHP_FUNCTION(fgetc)
 			RETURN_FALSE;
 		}
 		RETURN_UNICODEL(buf, buflen, 0);
-	} else {
+	} else { /* IS_STRING */
 		char buf[2];

 		buf[0] = php_stream_getc(stream);
@ -1068,7 +1068,7 @@ PHPAPI PHP_FUNCTION(fgetss)

 	php_stream_from_zval(stream, &zstream);

-	if (php_stream_reads_unicode(stream)) {
+	if (stream->readbuf_type == IS_UNICODE) {
 		UChar *buf = php_stream_get_line_ex(stream, IS_UNICODE, NULL_ZSTR, 0, length, &retlen);
 		UChar *allowed = NULL;
 		int allowed_len = 0;
@ -1085,7 +1085,7 @@ PHPAPI PHP_FUNCTION(fgetss)
 		retlen = php_u_strip_tags(buf, retlen, &stream->fgetss_state, allowed, allowed_len TSRMLS_CC);

 		RETURN_UNICODEL(buf, retlen, 0);
-	} else {
+	} else { /* IS_STRING */
 		char *buf = php_stream_get_line_ex(stream, IS_STRING, NULL_ZSTR, 0, length, &retlen);
 		char *allowed = NULL;
 		int allowed_len = 0;
@ -1752,7 +1752,7 @@ PHPAPI PHP_FUNCTION(fread)
 		RETURN_FALSE;
 	}

-	if (php_stream_reads_unicode(stream)) {
+	if (stream->readbuf_type == IS_UNICODE) {
 		int buflen = len;
 		UChar *buf = php_stream_read_unicode_chars(stream, &buflen);

@ -1761,7 +1761,7 @@ PHPAPI PHP_FUNCTION(fread)
 		}

 		RETURN_UNICODEL(buf, buflen, 0);
-	} else {
+	} else { /* IS_STRING */
 		char *buf = emalloc(len + 1);
 		int buflen = php_stream_read(stream, buf, len);

--- a/ext/standard/streamsfuncs.c
+++ b/ext/standard/streamsfuncs.c
@ -489,11 +489,11 @@ PHP_FUNCTION(stream_get_meta_data)
 		add_assoc_zval(return_value, "write_filters", newval);
 	}
 	
-	if (php_stream_reads_unicode(stream)) {
+	if (stream->readbuf_type == IS_UNICODE) {
 		int readbuf_len = u_countChar32(stream->readbuf.u + stream->readpos, stream->writepos - stream->readpos);
 		add_assoc_long(return_value, "unread_bytes", UBYTES(stream->writepos - stream->readpos));
 		add_assoc_long(return_value, "unread_chars", readbuf_len);
-	} else {
+	} else { /* IS_STRING */
 		add_assoc_long(return_value, "unread_bytes", stream->writepos - stream->readpos);
 		add_assoc_long(return_value, "unread_chars", stream->writepos - stream->readpos);
 	}
@ -1275,7 +1275,7 @@ PHP_FUNCTION(stream_get_line)

 	php_stream_from_zval(stream, &zstream);

-	if (php_stream_reads_unicode(stream)) {
+	if (stream->readbuf_type == IS_UNICODE) {
 		UChar *buf;
 		UChar *d = NULL;
 		int dlen = 0;
@ -1294,7 +1294,7 @@ PHP_FUNCTION(stream_get_line)
 		}

 		RETURN_UNICODEL(buf, buf_size, 0);
-	} else {
+	} else { /* IS_STRING */
 		char *buf;
 		char *d = NULL;
 		int dlen = 0;
@ -1462,6 +1462,67 @@ PHP_FUNCTION(stream_socket_enable_crypto)
 }
 /* }}} */

+/* {{{ proto void stream_encoding(resource stream[, string encoding])
+Set character set for stream encoding
+UTODO: Return current encoding charset
+*/
+PHP_FUNCTION(stream_encoding)
+{
+	zval *zstream;
+	php_stream *stream;
+	char *encoding = NULL;
+	int encoding_len = 0;
+	int remove_read_tail = 0, remove_write_tail = 0;
+
+	if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "r|s", &zstream, &encoding, &encoding_len) == FAILURE) {
+		return;
+	}
+
+	php_stream_from_zval(stream, &zstream);
+
+	/* Double check that the target encoding is legal before attempting anything */
+
+	if (stream->readfilters.tail) {
+		if (stream->readfilters.tail->fops == &php_unicode_from_string_filter_ops) {
+			/* Remove the current unicode.from.* filter, 
+               the filter layer will transcode anything in the read buffer back to binary 
+               or invalidate the read buffer */
+			remove_read_tail = 1;
+		} else if (stream->readbuf_type == IS_UNICODE) {
+			/* There's an encoding on the stream already, but then there's filtering happening after that point
+			   It's asking too much for PHP to figure out what the user wants, throw an error back in their face */
+			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot change encoding on filtered stream");
+			RETURN_FALSE;
+		}
+	}
+
+	if (stream->writefilters.tail) {
+		if (stream->writefilters.tail->fops == &php_unicode_to_string_filter_ops) {
+			/* Remove the current unicode.to.* filter */
+			remove_write_tail = 1;
+		} else if ((stream->writefilters.tail->fops->flags & PSFO_FLAG_OUTPUTS_UNICODE) == 0) {
+			/* conversion to binary is happening, them another filter is doing something
+			   bailout for same reason as read filters */
+			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Cannot change encoding on filtered stream");
+			RETURN_FALSE;
+		}
+	}
+
+	if (remove_read_tail) {
+		php_stream_filter_remove(stream->readfilters.tail, 1 TSRMLS_CC);
+	}
+	if (remove_write_tail) {
+		php_stream_filter_remove(stream->writefilters.tail, 1 TSRMLS_CC);
+	}
+
+	/* UTODO: Allow overriding error handling for converters */
+	php_stream_encoding_apply(stream, 1, encoding, UG(from_error_mode), UG(from_subst_char));
+	php_stream_encoding_apply(stream, 0, encoding, UG(to_error_mode), NULL);
+
+	RETURN_TRUE;
+}
+/* }}} */
+
 /*
 * Local variables:
 * tab-width: 4
--- a/ext/standard/streamsfuncs.h
+++ b/ext/standard/streamsfuncs.h
@ -53,6 +53,7 @@ PHP_FUNCTION(stream_context_get_default);
 PHP_FUNCTION(stream_filter_prepend);
 PHP_FUNCTION(stream_filter_append);
 PHP_FUNCTION(stream_filter_remove);
+PHP_FUNCTION(stream_encoding);
 PHP_FUNCTION(stream_socket_enable_crypto);
 PHP_FUNCTION(stream_socket_pair);

--- a/ext/unicode/config.m4
+++ b/ext/unicode/config.m4
@ -4,4 +4,4 @@ dnl

 PHP_SUBST(UNICODE_SHARED_LIBADD)
 AC_DEFINE(HAVE_UNICODE, 1, [ ])
-PHP_NEW_EXTENSION(unicode, unicode.c locale.c unicode_filter.c unicode_iterators.c collator.c, $ext_shared)
+PHP_NEW_EXTENSION(unicode, unicode.c locale.c unicode_iterators.c collator.c, $ext_shared)
--- a/ext/unicode/config.w32
+++ b/ext/unicode/config.w32
@ -1,5 +1,5 @@
 // $Id$
 // vim:ft=javascript

-EXTENSION("unicode", "unicode.c unicode_filter.c unicode_iterators.c collator.c locale.c");
+EXTENSION("unicode", "unicode.c unicode_iterators.c collator.c locale.c");
 AC_DEFINE('HAVE_UNICODE', 1, 'ICU API extension');
--- a/ext/unicode/php_unicode.h
+++ b/ext/unicode/php_unicode.h
@ -67,7 +67,6 @@ PHP_FUNCTION(collator_get_attribute);
 PHP_METHOD(collator, __construct);

 void php_init_collation(TSRMLS_D);
-extern php_stream_filter_factory php_unicode_filter_factory;

 #ifdef  __cplusplus
 } // extern "C" 
--- a/ext/unicode/unicode.c
+++ b/ext/unicode/unicode.c
@ -273,10 +273,6 @@ ZEND_GET_MODULE(unicode)
 /* {{{ PHP_MINIT_FUNCTION */
 PHP_MINIT_FUNCTION(unicode)
 {
-	if (php_stream_filter_register_factory("unicode.*", &php_unicode_filter_factory TSRMLS_CC) == FAILURE) {
-		return FAILURE;
-	}
-
 	php_register_unicode_iterators(TSRMLS_C);
 	php_init_collation(TSRMLS_C);
 	
@ -287,9 +283,6 @@ PHP_MINIT_FUNCTION(unicode)
 /* {{{ PHP_MSHUTDOWN_FUNCTION */
 PHP_MSHUTDOWN_FUNCTION(unicode)
 {
-	if (php_stream_filter_unregister_factory("unicode.*" TSRMLS_CC) == FAILURE) {
-		return FAILURE;
-	}
 	/* add your stuff here */

  
--- a/main/main.c
+++ b/main/main.c
@ -1611,6 +1611,12 @@ int php_module_startup(sapi_module_struct *sf, zend_module_entry *additional_mod
 		return FAILURE;
 	}

+	/* Initialize unicode filters */
+	if (php_stream_filter_register_factory("unicode.*", &php_unicode_filter_factory TSRMLS_CC) == FAILURE) {
+		php_printf("PHP:  Unable to initialize unicode stream filters.\n");
+		return FAILURE;
+	}
+
 	/* initialize registry for images to be used in phpinfo()
 	   (this uses configuration parameters from php.ini)
 	 */
@ -1744,6 +1750,7 @@ void php_module_shutdown(TSRMLS_D)

 	zend_shutdown(TSRMLS_C);

+	/* Destroys filter & transport registries too */
 	php_shutdown_stream_wrappers(module_number TSRMLS_CC);

 	php_shutdown_info_logos();
--- a/main/php_streams.h
+++ b/main/php_streams.h
@ -206,12 +206,9 @@ struct _php_stream  {
 	php_stream_context *context;
 	int flags;	/* PHP_STREAM_FLAG_XXX */

-	/* unicode */
-	UConverter *input_encoding;
-	UConverter *output_encoding;
-
 	/* buffer */
 	off_t position; /* of underlying stream */
+	zend_uchar readbuf_type;
 	zstr readbuf; /* readbuf.s or readbuf.u */
 	size_t readbuflen; /* Length in units (char or UChar) */
 	off_t readpos; /* Position in units (char or UChar) */
@ -252,8 +249,6 @@ END_EXTERN_C()
 #define php_stream_from_zval_no_verify(xstr, ppzval)	(xstr) = (php_stream*)zend_fetch_resource((ppzval) TSRMLS_CC, -1, "stream", NULL, 2, php_file_le_stream(), php_file_le_pstream())

 #define PS_ULEN(is_unicode, len)	((is_unicode) ? UBYTES(len) : (len))
-#define php_stream_reads_unicode(stream)	((stream->input_encoding) ? 1 : 0)
-#define php_stream_writes_unicode(stream)	((stream->output_encoding) ? 1 : 0)

 BEGIN_EXTERN_C()
 PHPAPI int php_stream_from_persistent_id(const char *persistent_id, php_stream **stream TSRMLS_DC);
--- a/main/streams/filter.c
+++ b/main/streams/filter.c
@ -396,50 +396,63 @@ PHPAPI void _php_stream_filter_append(php_stream_filter_chain *chain, php_stream
 	chain->tail = filter;
 	filter->chain = chain;

-	if (&(stream->readfilters) == chain && (stream->writepos - stream->readpos) > 0) {
+	if (&(stream->readfilters) == chain) {
 		/* Let's going ahead and wind anything in the buffer through this filter */
 		php_stream_bucket_brigade brig_in = { NULL, NULL }, brig_out = { NULL, NULL };
 		php_stream_bucket_brigade *brig_inp = &brig_in, *brig_outp = &brig_out;
-		php_stream_filter_status_t status;
+		php_stream_filter_status_t status = PSFS_FEED_ME;
 		php_stream_bucket *bucket;
 		size_t consumed = 0;

-		if (stream->input_encoding) {
-			bucket = php_stream_bucket_new_unicode(stream, stream->readbuf.u + stream->readpos, stream->writepos - stream->readpos, 0, 0 TSRMLS_CC);
-		} else {
-			bucket = php_stream_bucket_new(stream, stream->readbuf.s + stream->readpos, stream->writepos - stream->readpos, 0, 0 TSRMLS_CC);
-		}
-		php_stream_bucket_append(brig_inp, bucket TSRMLS_CC);
-		status = filter->fops->filter(stream, filter, brig_inp, brig_outp, &consumed, PSFS_FLAG_NORMAL TSRMLS_CC);
+		if ((stream->writepos - stream->readpos) > 0) {
+			if (stream->readbuf_type == IS_UNICODE) {
+				bucket = php_stream_bucket_new_unicode(stream, stream->readbuf.u + stream->readpos, stream->writepos - stream->readpos, 0, 0 TSRMLS_CC);
+			} else {
+				bucket = php_stream_bucket_new(stream, stream->readbuf.s + stream->readpos, stream->writepos - stream->readpos, 0, 0 TSRMLS_CC);
+			}
+			php_stream_bucket_append(brig_inp, bucket TSRMLS_CC);
+			status = filter->fops->filter(stream, filter, brig_inp, brig_outp, &consumed, PSFS_FLAG_NORMAL TSRMLS_CC);

-		if (stream->readpos + consumed > stream->writepos || consumed < 0) {
-			/* No behaving filter should cause this. */
-			status = PSFS_ERR_FATAL;
+			if (stream->readpos + consumed > stream->writepos || consumed < 0) {
+				/* No behaving filter should cause this. */
+				status = PSFS_ERR_FATAL;
+			}
 		}

-		switch (status) {
-			case PSFS_ERR_FATAL:
-				/* If this first cycle simply fails then there's something wrong with the filter.
-				   Pull the filter off the chain and leave the read buffer alone. */
-				if (chain->head == filter) {
-					chain->head = NULL;
-					chain->tail = NULL;
-				} else {
-					filter->prev->next = NULL;
-					chain->tail = filter->prev;
-				}
-				php_stream_bucket_unlink(bucket TSRMLS_CC);
-				php_stream_bucket_delref(bucket TSRMLS_CC);
-				php_error_docref(NULL TSRMLS_CC, E_WARNING, "Filter failed to process pre-buffered data.  Not adding to filterchain.");
-				break;
-			case PSFS_FEED_ME:
+		if (status == PSFS_ERR_FATAL) {
+			/* If this first cycle simply fails then there's something wrong with the filter.
+			   Pull the filter off the chain and leave the read buffer alone. */
+			if (chain->head == filter) {
+				chain->head = NULL;
+				chain->tail = NULL;
+			} else {
+				filter->prev->next = NULL;
+				chain->tail = filter->prev;
+			}
+			php_stream_bucket_unlink(bucket TSRMLS_CC);
+			php_stream_bucket_delref(bucket TSRMLS_CC);
+			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Filter failed to process pre-buffered data.  Not adding to filterchain.");
+		} else {
+			/* This filter addition may change the readbuffer type.
+			   Since all the previously held data is in the bucket brigade,
+			   we can reappropriate the buffer that already exists (if one does) */
+			if (stream->readbuf_type == IS_UNICODE && (filter->fops->flags & PSFO_FLAG_OUTPUTS_UNICODE) == 0) {
+				/* Buffer is currently based on unicode characters, but filter only outputs STRING adjust counting */
+				stream->readbuf_type = IS_STRING;
+				stream->readbuflen *= UBYTES(1);
+			} else if (stream->readbuf_type == IS_STRING && (filter->fops->flags & PSFO_FLAG_OUTPUTS_STRING) == 0) {
+				/* Buffer is currently based on binary characters, but filter only outputs UNICODE adjust counting */
+				stream->readbuf_type = IS_UNICODE;
+				stream->readbuflen /= UBYTES(1);
+			}
+
+			if (status == PSFS_FEED_ME) {
 				/* We don't actually need data yet,
 				   leave this filter in a feed me state until data is needed. 
 				   Reset stream's internal read buffer since the filter is "holding" it. */
 				stream->readpos = 0;
 				stream->writepos = 0;
-				break;
-			case PSFS_PASS_ON:
+			} else if (status == PSFS_PASS_ON) {
 				/* Put any filtered data onto the readbuffer stack.
 				   Previously read data has been at least partially consumed. */
 				stream->readpos += consumed;
@ -454,23 +467,20 @@ PHPAPI void _php_stream_filter_append(php_stream_filter_chain *chain, php_stream
 					bucket = brig_outp->head;

 					/* Convert for stream type */
-					if (bucket->buf_type != IS_UNICODE && stream->input_encoding) {
-						/* Stream expects unicode, convert using stream encoding */
-						php_stream_bucket_convert(bucket, IS_UNICODE, stream->input_encoding);
-					} else if (bucket->buf_type == IS_UNICODE && !stream->input_encoding) {
-						/* Stream expects binary, filter provided unicode, just take the buffer as is */
-						php_stream_bucket_convert_notranscode(bucket, IS_STRING);
+					if (bucket->buf_type != stream->readbuf_type) {
+						/* Stream expects different type than bucket contains, convert slopily */
+						php_stream_bucket_convert_notranscode(bucket, stream->readbuf_type);
 					}

 					/* Grow buffer to hold this bucket if need be.
 					   TODO: See warning in main/stream/streams.c::php_stream_fill_read_buffer */
 					if (stream->readbuflen - stream->writepos < bucket->buflen) {
 						stream->readbuflen += bucket->buflen;
-						stream->readbuf.v = perealloc(stream->readbuf.v, PS_ULEN(stream->input_encoding, stream->readbuflen), stream->is_persistent);
+						stream->readbuf.v = perealloc(stream->readbuf.v, PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->readbuflen), stream->is_persistent);
 					}

 					/* Append to readbuf */
-					if (stream->input_encoding) {
+					if (stream->readbuf_type == IS_UNICODE) {
 						memcpy(stream->readbuf.u + stream->writepos, bucket->buf.u, UBYTES(bucket->buflen));
 					} else {
 						memcpy(stream->readbuf.s + stream->writepos, bucket->buf.s, bucket->buflen);
@ -480,10 +490,9 @@ PHPAPI void _php_stream_filter_append(php_stream_filter_chain *chain, php_stream
 					php_stream_bucket_unlink(bucket TSRMLS_CC);
 					php_stream_bucket_delref(bucket TSRMLS_CC);
 				}
-				break;
+			}
 		}
-		
-	}
+	} /* end of readfilters specific code */
 }

 PHPAPI int _php_stream_filter_check_chain(php_stream_filter_chain *chain TSRMLS_DC)
@ -597,26 +606,23 @@ PHPAPI int _php_stream_filter_flush(php_stream_filter *filter, int finish TSRMLS
 		/* Dump any newly flushed data to the read buffer */
 		if (stream->readpos > stream->chunk_size) {
 			/* Back the buffer up */
-			memcpy(stream->readbuf.s, stream->readbuf.s + PS_ULEN(stream->input_encoding, stream->readpos), PS_ULEN(stream->input_encoding, stream->writepos - stream->readpos));
+			memcpy(stream->readbuf.s, stream->readbuf.s + PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->readpos), PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->writepos - stream->readpos));
 			stream->writepos -= stream->readpos;
 			stream->readpos = 0;
 		}
 		if (flushed_size > (stream->readbuflen - stream->writepos)) {
 			/* Grow the buffer */
-			stream->readbuf.v = perealloc(stream->readbuf.v, PS_ULEN(stream->input_encoding, stream->writepos + flushed_size + stream->chunk_size), stream->is_persistent);
+			stream->readbuf.v = perealloc(stream->readbuf.v, PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->writepos + flushed_size + stream->chunk_size), stream->is_persistent);
 		}
 		while ((bucket = inp->head)) {
 			/* Convert if necessary */
-			if (bucket->buf_type != IS_UNICODE && stream->input_encoding) {
-				/* Stream expects unicode, convert using stream encoding */
-				php_stream_bucket_convert(bucket, IS_UNICODE, stream->input_encoding);
-			} else if (bucket->buf_type == IS_UNICODE && !stream->input_encoding) {
-				/* Stream expects binary, filter provided unicode, just take the buffer as is */
-				php_stream_bucket_convert_notranscode(bucket, IS_STRING);
+			if (bucket->buf_type != stream->readbuf_type) {
+				/* Stream expects different type than what's in bucket, convert slopily */
+				php_stream_bucket_convert_notranscode(bucket, stream->readbuf_type);
 			}

 			/* Append to readbuf */
-			if (stream->input_encoding) {
+			if (stream->readbuf_type == IS_UNICODE) {
 				 memcpy(stream->readbuf.u + stream->writepos, bucket->buf.u, UBYTES(bucket->buflen));
 			} else {
 				 memcpy(stream->readbuf.s + stream->writepos, bucket->buf.s, bucket->buflen);
@ -632,13 +638,8 @@ PHPAPI int _php_stream_filter_flush(php_stream_filter *filter, int finish TSRMLS
 		while ((bucket = inp->head)) {
 			/* Convert if necessary */
 			if (bucket->buf_type == IS_UNICODE) {
-				if (stream->output_encoding) {
-					/* Stream has a configured output encoding, convert to appropriate type */
-					php_stream_bucket_convert(bucket, IS_STRING, stream->output_encoding);
-				} else {
-					/* Stream is binary, write ugly UChars as is */
-					php_stream_bucket_convert_notranscode(bucket, IS_STRING);
-				}
+				/* Force data to binary, adjusting buflen */
+				php_stream_bucket_convert_notranscode(bucket, IS_STRING);
 			}

 			/* Must be binary by this point */
@ -654,6 +655,9 @@ PHPAPI int _php_stream_filter_flush(php_stream_filter *filter, int finish TSRMLS

 PHPAPI php_stream_filter *php_stream_filter_remove(php_stream_filter *filter, int call_dtor TSRMLS_DC)
 {
+	/* UTODO: Figure out a sane way to "defilter" so that unicode converters can be swapped around
+	   For now, at least fopen(,'b') + stream_encoding($fp, 'charset') works since there's nothing to remove */
+
 	if (filter->prev) {
 		filter->prev->next = filter->next;
 	} else {
@ -770,6 +774,42 @@ PHPAPI int _php_stream_bucket_convert(php_stream_bucket *bucket, unsigned char t
 	return FAILURE;
 }

+PHPAPI int _php_stream_encoding_apply(php_stream *stream, int writechain, const char *encoding, uint16_t error_mode, UChar *subst TSRMLS_DC)
+{
+	int encoding_len = strlen(encoding);
+	int buflen = sizeof("unicode.from.") + encoding_len - 1; /* might be "to", but "from" is long enough for both */
+	char *buf = emalloc(buflen + 1);
+	php_stream_filter *filter;
+	zval *filterparams;
+
+	if (writechain) {
+		memcpy(buf, "unicode.to.", sizeof("unicode.to.") - 1);
+		memcpy(buf + sizeof("unicode.to.") - 1, encoding, encoding_len + 1);
+	} else {
+		memcpy(buf, "unicode.from.", sizeof("unicode.from.") - 1);
+		memcpy(buf + sizeof("unicode.from.") - 1, encoding, encoding_len + 1);
+	}
+
+	ALLOC_INIT_ZVAL(filterparams);
+	array_init(filterparams);
+	add_assoc_long(filterparams, "error_mode", error_mode);
+	if (subst) {
+		add_assoc_unicode(filterparams, "subst_char", subst, 1);
+	}
+	filter = php_stream_filter_create(buf, filterparams, php_stream_is_persistent(stream) TSRMLS_CC);
+	efree(buf);
+	zval_ptr_dtor(&filterparams);
+
+	if (!filter) {
+		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to apply encoding for charset: %s\n", encoding);
+		return FAILURE;
+	}
+
+	php_stream_filter_append(writechain ? &stream->writefilters : &stream->readfilters, filter);
+
+	return SUCCESS;
+}
+
 /*
 * Local variables:
 * tab-width: 4
--- a/main/streams/php_stream_filter_api.h
+++ b/main/streams/php_stream_filter_api.h
@ -157,6 +157,7 @@ PHPAPI int _php_stream_filter_flush(php_stream_filter *filter, int finish TSRMLS
 PHPAPI php_stream_filter *php_stream_filter_remove(php_stream_filter *filter, int call_dtor TSRMLS_DC);
 PHPAPI void php_stream_filter_free(php_stream_filter *filter TSRMLS_DC);
 PHPAPI php_stream_filter *_php_stream_filter_alloc(php_stream_filter_ops *fops, void *abstract, int persistent STREAMS_DC TSRMLS_DC);
+PHPAPI int _php_stream_encoding_apply(php_stream *stream, int writechain, const char *encoding, uint16_t error_mode, UChar *subst TSRMLS_DC);
 END_EXTERN_C()
 #define php_stream_filter_alloc(fops, thisptr, persistent) _php_stream_filter_alloc((fops), (thisptr), (persistent) STREAMS_CC TSRMLS_CC)
 #define php_stream_filter_alloc_rel(fops, thisptr, persistent) _php_stream_filter_alloc((fops), (thisptr), (persistent) STREAMS_REL_CC TSRMLS_CC)
@ -165,6 +166,8 @@ END_EXTERN_C()
 #define php_stream_filter_flush(filter, finish) _php_stream_filter_flush((filter), (finish) TSRMLS_CC)
 #define php_stream_filter_check_chain(chain) _php_stream_filter_check_chain((chain) TSRMLS_CC)
 #define php_stream_filter_output_prefer_unicode(filter) _php_stream_filter_output_prefer_unicode((filter) TSRMLS_CC)
+#define php_stream_encoding_apply(stream, writechain, encoding, error_mode, subst) \
+		_php_stream_encoding_apply((stream), (writechain), (encoding), (error_mode), (subst) TSRMLS_CC)

 #define php_stream_is_filtered(stream)	((stream)->readfilters.head || (stream)->writefilters.head)

@ -179,6 +182,12 @@ PHPAPI int php_stream_filter_register_factory_volatile(const char *filterpattern
 PHPAPI php_stream_filter *php_stream_filter_create(const char *filtername, zval *filterparams, int persistent TSRMLS_DC);
 END_EXTERN_C()

+/* unicode_filter.c exports */
+extern php_stream_filter_ops php_unicode_to_string_filter_ops;
+extern php_stream_filter_ops php_unicode_from_string_filter_ops;
+extern php_stream_filter_ops php_unicode_tidy_filter_ops;
+extern php_stream_filter_factory php_unicode_filter_factory;
+
 /*
 * Local variables:
 * tab-width: 4
--- a/main/streams/streams.c
+++ b/main/streams/streams.c
@ -239,6 +239,7 @@ fprintf(stderr, "stream_alloc: %s:%p persistent=%s\n", ops->label, ret, persiste
 	ret->abstract = abstract;
 	ret->is_persistent = persistent_id ? 1 : 0;
 	ret->chunk_size = FG(def_chunk_size);
+	ret->readbuf_type = IS_STRING;

 	if (FG(auto_detect_line_endings)) {
 		ret->flags |= PHP_STREAM_FLAG_DETECT_EOL;
@ -483,12 +484,9 @@ static void php_stream_fill_read_buffer(php_stream *stream, size_t size TSRMLS_D
 					 * stream read buffer */
 					while (brig_inp->head) {
 						bucket = brig_inp->head;
-						if (bucket->buf_type != IS_UNICODE && stream->input_encoding) {
-							/* Stream expects unicode, convert using stream encoding */
-							php_stream_bucket_convert(bucket, IS_UNICODE, stream->input_encoding);
-						} else if (bucket->buf_type == IS_UNICODE && !stream->input_encoding) {
-							/* Stream expects binary, filter provided unicode, just take the buffer as is */
-							php_stream_bucket_convert_notranscode(bucket, IS_STRING);
+						if (bucket->buf_type != stream->readbuf_type) {
+							/* Stream expects different datatype than bucket has, convert slopily */
+							php_stream_bucket_convert_notranscode(bucket, stream->readbuf_type);
 						}
 						/* Bucket type now matches stream type */

@ -496,9 +494,9 @@ static void php_stream_fill_read_buffer(php_stream *stream, size_t size TSRMLS_D
 						 * TODO: this can fail for persistent streams */
 						if (stream->readbuflen - stream->writepos < bucket->buflen) {
 							stream->readbuflen += bucket->buflen;
-							stream->readbuf.v = perealloc(stream->readbuf.v, PS_ULEN(stream->input_encoding, stream->readbuflen), stream->is_persistent);
+							stream->readbuf.v = perealloc(stream->readbuf.v, PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->readbuflen), stream->is_persistent);
 						}
-						memcpy(stream->readbuf.s + stream->writepos, bucket->buf.s, PS_ULEN(stream->input_encoding, bucket->buflen));
+						memcpy(stream->readbuf.s + stream->writepos, bucket->buf.s, PS_ULEN(stream->readbuf_type == IS_UNICODE, bucket->buflen));
 						stream->writepos += bucket->buflen;

 						php_stream_bucket_unlink(bucket TSRMLS_CC);
@ -530,46 +528,6 @@ static void php_stream_fill_read_buffer(php_stream *stream, size_t size TSRMLS_D
 		}

 		efree(chunk_buf);
-	} else if (stream->input_encoding) { /* Unfiltered Unicode stream */
-		/* is there enough data in the buffer ? */
-		if (stream->writepos - stream->readpos < (off_t)size) {
-			char *binbuf;
-			UChar *ubuf;
-			int binbuf_len, ubuf_len;
-			size_t toread = (size > stream->chunk_size) ? size : stream->chunk_size;
-			UErrorCode status = U_ZERO_ERROR;
-
-			/* Read stream data into temporary buffer, then convert to unicode
-			   TODO: This can be improved */
-			binbuf = emalloc(toread + 1);
-			binbuf_len = stream->ops->read(stream, binbuf, toread TSRMLS_CC);
-			if (binbuf_len == (size_t)-1) {
-				/* Failure */
-				efree(binbuf);
-				return;
-			}
-			/* Convert to unicode */
-			zend_convert_to_unicode(stream->input_encoding, &ubuf, &ubuf_len, binbuf, binbuf_len, &status);
-			efree(binbuf);
-
-			/* reduce buffer memory consumption if possible, to avoid a realloc */
-			if (stream->readbuf.u && stream->readbuflen - stream->writepos < stream->chunk_size) {
-				memmove(stream->readbuf.u, stream->readbuf.u + stream->readpos, UBYTES(stream->readbuflen - stream->readpos));
-				stream->writepos -= stream->readpos;
-				stream->readpos = 0;
-			}
-
-			/* grow the buffer if required
-			 * TODO: this can fail for persistent streams */
-			if (stream->readbuflen - stream->writepos < ubuf_len) {
-				stream->readbuflen += ((stream->chunk_size > ubuf_len) ? stream->chunk_size : ubuf_len);
-				stream->readbuf.u = (UChar*)perealloc(stream->readbuf.u, UBYTES(stream->readbuflen), stream->is_persistent);
-			}
-
-			memcpy(stream->readbuf.u + stream->writepos, ubuf, UBYTES(ubuf_len));
-			efree(ubuf);
-			stream->writepos += ubuf_len;
-		}
 	} else {	/* Unfiltered Binary stream */
 		/* is there enough data in the buffer ? */
 		if (stream->writepos - stream->readpos < (off_t)size) {
@ -609,13 +567,13 @@ PHPAPI size_t _php_stream_read(php_stream *stream, char *buf, size_t size TSRMLS
 		 * drain the remainder of the buffer before using the "raw" read mode for
 		 * the excess */
 		if (stream->writepos - stream->readpos > 0) {
-			toread = PS_ULEN(stream->input_encoding, stream->writepos - stream->readpos);
+			toread = PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->writepos - stream->readpos);

 			if (toread > size) {
 				toread = size;
 			}

-			if (stream->input_encoding) {
+			if (stream->readbuf_type == IS_UNICODE) {
 				/* Sloppy read, anyone using php_stream_read() on a unicode stream
 				 * had better know what they're doing */
 				
@ -647,7 +605,7 @@ PHPAPI size_t _php_stream_read(php_stream *stream, char *buf, size_t size TSRMLS
 			}

 			if (toread > 0) {
-				if (php_stream_reads_unicode(stream)) {
+				if (stream->readbuf_type == IS_UNICODE) {
 					/* Sloppy read, anyone using php_stream_read() on a unicode stream
 					 * had better know what they're doing */
 				
@ -685,7 +643,7 @@ PHPAPI size_t _php_stream_read_unicode(php_stream *stream, UChar *buf, int size,
 {
 	size_t toread = 0, didread = 0, string_length = 0;

-	if (!stream->input_encoding) {
+	if (stream->readbuf_type != IS_UNICODE) {
 		return -1;
 	}

@ -763,7 +721,7 @@ PHPAPI UChar *_php_stream_read_unicode_chars(php_stream *stream, int *pchars TSR
 	int buflen = size;
 	size_t toread = 0, didread = 0, string_length = 0;

-	if (!stream->input_encoding) {
+	if (stream->readbuf_type != IS_UNICODE) {
 		return NULL;
 	}

@ -921,7 +879,7 @@ PHPAPI void *php_stream_locate_eol(php_stream *stream, zstr zbuf, int buf_len TS
 	char *readptr, *buf = zbuf.s;

 	if (!buf) {
-		readptr = stream->readbuf.s + PS_ULEN(stream->input_encoding, stream->readpos);
+		readptr = stream->readbuf.s + PS_ULEN(stream->readbuf_type == IS_UNICODE, stream->readpos);
 		avail = stream->writepos - stream->readpos;
 	} else {
 		readptr = zbuf.s;
@ -929,7 +887,7 @@ PHPAPI void *php_stream_locate_eol(php_stream *stream, zstr zbuf, int buf_len TS
 	}

 	if (stream->flags & PHP_STREAM_FLAG_DETECT_EOL) {
-		if (stream->input_encoding) {
+		if (stream->readbuf_type == IS_UNICODE) {
 			cr = (char*)u_memchr((UChar*)readptr, '\r', avail);
 			lf = (char*)u_memchr((UChar*)readptr, '\n', avail);
 		} else {
@ -948,10 +906,10 @@ PHPAPI void *php_stream_locate_eol(php_stream *stream, zstr zbuf, int buf_len TS
 			eol = lf;
 		}
 	} else if (stream->flags & PHP_STREAM_FLAG_EOL_MAC) {
-		eol = stream->input_encoding ? u_memchr((UChar*)readptr, '\r', avail) : memchr(readptr, '\r', avail);
+		eol = (stream->readbuf_type == IS_UNICODE) ? u_memchr((UChar*)readptr, '\r', avail) : memchr(readptr, '\r', avail);
 	} else {
 		/* unix (and dos) line endings */
-		eol = stream->input_encoding ? u_memchr((UChar*)readptr, '\n', avail) : memchr(readptr, '\n', avail);
+		eol = (stream->readbuf_type == IS_UNICODE) ? u_memchr((UChar*)readptr, '\n', avail) : memchr(readptr, '\n', avail);
 	}

 	return (void*)eol;
@ -967,7 +925,7 @@ PHPAPI void *_php_stream_get_line(php_stream *stream, int buf_type, zstr buf, si
 	size_t current_buf_size = 0;
 	size_t total_copied = 0;
 	int grow_mode = 0;
-	int is_unicode = php_stream_reads_unicode(stream);
+	int is_unicode = stream->readbuf_type == IS_UNICODE;
 	int split_surrogate = 0;
 	zstr bufstart = buf;

@ -1042,8 +1000,8 @@ PHPAPI void *_php_stream_get_line(php_stream *stream, int buf_type, zstr buf, si
 				 * than 8K, we waste 1 byte per additional 8K or so.
 				 * That seems acceptable to me, to avoid making this code
 				 * hard to follow */
-				bufstart.s = erealloc(bufstart.s, PS_ULEN(stream->input_encoding, current_buf_size + cpysz + 1));
-				buf.s = bufstart.s + PS_ULEN(stream->input_encoding, total_copied);
+				bufstart.s = erealloc(bufstart.s, PS_ULEN(stream->readbuf_type == IS_UNICODE, current_buf_size + cpysz + 1));
+				buf.s = bufstart.s + PS_ULEN(stream->readbuf_type == IS_UNICODE, total_copied);
 				current_buf_size += cpysz + 1;
 			} else {
 				if (cpysz >= maxlen - 1) {
@ -1177,7 +1135,7 @@ PHPAPI UChar *php_stream_get_record_unicode(php_stream *stream, size_t maxlen, s
 	size_t toread;
 	int skip = 0;

-	if (!php_stream_reads_unicode(stream)) {
+	if (stream->readbuf_type != IS_UNICODE) {
 		return NULL;
 	}

@ -1241,8 +1199,7 @@ PHPAPI UChar *php_stream_get_record_unicode(php_stream *stream, size_t maxlen, s
 /* Writes a buffer directly to a stream, using multiple of the chunk size */
 static size_t _php_stream_write_buffer(php_stream *stream, int buf_type, zstr buf, int buflen TSRMLS_DC)
 {
-	size_t didwrite = 0, towrite, justwrote, shouldwrite, buflen_orig = buflen;
-	zstr buf_orig = buf;
+	size_t didwrite = 0, towrite, justwrote, shouldwrite;
 	char *freeme = NULL;

 	/* if we have a seekable stream we need to ensure that data is written at the
@ -1254,24 +1211,9 @@ static size_t _php_stream_write_buffer(php_stream *stream, int buf_type, zstr bu
 		stream->ops->seek(stream, stream->position, SEEK_SET, &stream->position TSRMLS_CC);
 	}

-	if (stream->output_encoding && buf_type == IS_UNICODE) {
-		char *dest;
-		int destlen, num_conv;
-		UErrorCode status = U_ZERO_ERROR;
-
-		num_conv = zend_convert_from_unicode(stream->output_encoding, &dest, &destlen, buf.u, buflen, &status);
-		if (U_FAILURE(status)) {
-			int32_t offset = u_countChar32(buf.u, num_conv);
-
-			zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", stream->output_encoding, ZEND_FROM_UNICODE, offset, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
-		}
-		freeme = buf.s = dest;
-		buflen = destlen;
-	} else {
-		/* Sloppy handling, make it a binary buffer */
-		if (buf_type != IS_STRING) {
-			buflen = UBYTES(buflen);
-		}
+	/* Sloppy handling, make it a binary buffer */
+	if (buf_type == IS_UNICODE) {
+		buflen = UBYTES(buflen);
 	}

 	shouldwrite = buflen;
@ -1300,32 +1242,7 @@ static size_t _php_stream_write_buffer(php_stream *stream, int buf_type, zstr bu
 		}
 	}

-
-	if (stream->output_encoding) {
-		/* Map didwrite back to the original character count */
-		if (didwrite == shouldwrite) {
-			/* Everything wrote okay, no need to count */
-			didwrite = buflen_orig;
-		} else {
-			UErrorCode status = U_ZERO_ERROR;
-			char *t = freeme;
-			const UChar *p = buf_orig.u;
-
-			switch (ucnv_getType(stream->output_encoding)) {
-				case UCNV_SBCS:
-				case UCNV_LATIN_1:
-				case UCNV_US_ASCII:
-					/* 1:1 character->byte mapping, didwrite really does mean the number of characters written */
-					break;
-				default:
-					/* Reconvert into junk buffer to see where conversion stops in source string */
-					ucnv_resetFromUnicode(stream->output_encoding);
-					ucnv_fromUnicode(stream->output_encoding, &t, t + didwrite, &p, p + buflen_orig, NULL, TRUE, &status);
-					/* p stops at the first unconvertable UChar when t runs out of space */
-					didwrite = p - buf_orig.u;
-			}
-		}
-	} else if (buf_type == IS_UNICODE) {
+	if (buf_type == IS_UNICODE) {
 		/* Was slopily converted */
 		didwrite /= UBYTES(1);
 	}
@ -2274,50 +2191,15 @@ PHPAPI php_stream *_php_stream_open_wrapper_ex(char *path, char *mode, int optio
 	if (stream && strchr(implicit_mode, 't') && UG(unicode)) {
 		if (strchr(implicit_mode, 'w') || strchr(implicit_mode, 'a') || strchr(implicit_mode, '+')) {
 			char *encoding = (context && context->output_encoding) ? context->output_encoding : "utf8";
-			UErrorCode status = U_ZERO_ERROR;
-
-			stream->output_encoding = ucnv_open(encoding, &status);
-			if (U_FAILURE(status)) {
-				switch (status) {
-					case U_MEMORY_ALLOCATION_ERROR:
-						php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC,
-							"Unable to allocate memory for unicode output converter: %s", encoding);
-						break;
-					case U_FILE_ACCESS_ERROR:
-						php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC,
-							"Error loading unicode output converter: %s", encoding);
-						break;
-					default:
-						php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC,
-							"Unknown error starting unicode output converter: %s", encoding);
-				}
-			} else {
-				/* UTODO: (Maybe?) Allow overriding the default error handlers on a per-stream basis via context params */
-				zend_set_converter_error_mode(stream->output_encoding, ZEND_FROM_UNICODE, UG(from_error_mode));
-				zend_set_converter_subst_char(stream->output_encoding, UG(from_subst_char));
-			}
+
+			/* UTODO: (Maybe?) Allow overriding the default error handlers on a per-stream basis via context params */
+			php_stream_encoding_apply(stream, 1, encoding, UG(from_error_mode), UG(from_subst_char));
 		}
 		if (strchr(implicit_mode, 'r') || strchr(implicit_mode, '+')) {
 			char *encoding = (context && context->input_encoding) ? context->input_encoding : "utf8";
-			UErrorCode status = U_ZERO_ERROR;
-
-			stream->input_encoding = ucnv_open(encoding, &status);
-			if (U_FAILURE(status)) {
-				switch (status) {
-					case U_MEMORY_ALLOCATION_ERROR:
-						php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC,
-							"Unable to allocate memory for unicode input converter: %s", encoding);
-						break;
-					case U_FILE_ACCESS_ERROR:
-						php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC,
-							"Error loading unicode input converter: %s", encoding);
-						break;
-					default:
-						php_stream_wrapper_log_error(wrapper, options ^ REPORT_ERRORS TSRMLS_CC,
-							"Unknown error starting unicode input converter: %s", encoding);
-				}
-			}
-			/* UTODO: If/When Input error handling gets implemented, set the options on success */
+
+			/* UTODO: (Maybe?) Allow overriding the default error handlers on a per-stream basis via context params */
+			php_stream_encoding_apply(stream, 0, encoding, UG(to_error_mode), NULL);
 		}
 	}

@ -2334,6 +2216,7 @@ PHPAPI php_stream *_php_stream_open_wrapper_ex(char *path, char *mode, int optio
 		pefree(copy_of_path, persistent);
 	}
 #endif
+
 	return stream;
 }
 /* }}} */
--- a/main/streams/unicode_filter.c
+++ b/main/streams/unicode_filter.c
@ -74,6 +74,7 @@ static php_stream_filter_status_t php_unicode_to_string_filter(
 			destp = destbuf = (char *)pemalloc(destlen, data->is_persistent);

 			ucnv_fromUnicode(data->conv, &destp, destbuf + destlen, (const UChar**)&src, src + remaining, NULL, FALSE, &errCode);
+			/* UTODO: Error catching */
 			new_bucket = php_stream_bucket_new(stream, destbuf, destp - destbuf, 1, data->is_persistent TSRMLS_CC);
 			php_stream_bucket_append(buckets_out, new_bucket TSRMLS_CC);
 			exit_status = PSFS_PASS_ON;
@ -88,6 +89,7 @@ static php_stream_filter_status_t php_unicode_to_string_filter(
 		/* Spit it out! */

 		ucnv_fromUnicode(data->conv, &dest, destp, NULL, NULL, NULL, TRUE, &errCode);
+		/* UTODO: Error catching */
 		if (dest > d) {
 			php_stream_bucket *bucket = php_stream_bucket_new(stream, d, dest - d, 0, 0 TSRMLS_CC);
 			php_stream_bucket_append(buckets_out, bucket TSRMLS_CC);
@ -145,6 +147,7 @@ static php_stream_filter_status_t php_unicode_from_string_filter(
 			destp = destbuf = (UChar *)pemalloc(destlen, data->is_persistent);

 			ucnv_toUnicode(data->conv, &destp, (UChar*)((char*)destbuf + destlen), (const char**)&src, src + remaining, NULL, FALSE, &errCode);
+			/* UTODO: Error catching */

 			new_bucket = php_stream_bucket_new_unicode(stream, destbuf, destp - destbuf, 1, data->is_persistent TSRMLS_CC);
 			php_stream_bucket_append(buckets_out, new_bucket TSRMLS_CC);
@ -160,6 +163,7 @@ static php_stream_filter_status_t php_unicode_from_string_filter(
 		/* Spit it out! */

 		ucnv_toUnicode(data->conv, &dest, destp, NULL, NULL, NULL, TRUE, &errCode);
+		/* UTODO: Error catching */
 		if (dest > d) {
 			php_stream_bucket *bucket = php_stream_bucket_new_unicode(stream, d, dest - d, 0, 0 TSRMLS_CC);
 			php_stream_bucket_append(buckets_out, bucket TSRMLS_CC);
@ -220,21 +224,21 @@ static void php_unicode_filter_dtor(php_stream_filter *thisfilter TSRMLS_DC)
 	}
 }

-static php_stream_filter_ops php_unicode_to_string_filter_ops = {
+php_stream_filter_ops php_unicode_to_string_filter_ops = {
 	php_unicode_to_string_filter,
 	php_unicode_filter_dtor,
 	"unicode.to.*",
 	PSFO_FLAG_ACCEPTS_UNICODE | PSFO_FLAG_OUTPUTS_STRING
 };

-static php_stream_filter_ops php_unicode_from_string_filter_ops = {
+php_stream_filter_ops php_unicode_from_string_filter_ops = {
 	php_unicode_from_string_filter,
 	php_unicode_filter_dtor,
 	"unicode.from.*",
 	PSFO_FLAG_ACCEPTS_STRING | PSFO_FLAG_OUTPUTS_UNICODE
 };

-static php_stream_filter_ops php_unicode_tidy_filter_ops = {
+php_stream_filter_ops php_unicode_tidy_filter_ops = {
 	php_unicode_tidy_filter,
 	php_unicode_filter_dtor,
 	"unicode.tidy.*",
@ -251,7 +255,10 @@ static php_stream_filter *php_unicode_filter_create(const char *filtername, zval
 	const char *charset, *direction;
 	php_stream_filter_ops *fops;
 	UErrorCode ucnvError = U_ZERO_ERROR;
+	/* Note: from_error_mode means from unicode to charset.  from filter means from charset to unicode */
+	uint16_t err_mode = UG(from_error_mode);
 	char to_unicode = 0;
+	zval **tmpzval;

 	if (strncasecmp(filtername, "unicode.", sizeof("unicode.") - 1)) {
 		/* Never happens */
@ -264,8 +271,9 @@ static php_stream_filter *php_unicode_filter_create(const char *filtername, zval
 		charset = direction + sizeof("to.") - 1;
 	} else if (strncmp(direction, "from.", sizeof("from.") - 1) == 0) {
 		fops = &php_unicode_from_string_filter_ops;
-		to_unicode = 1;
 		charset = direction + sizeof("from.") - 1;
+		to_unicode = 1;
+		err_mode = UG(to_error_mode);
 	} else if (strncmp(direction, "tidy.", sizeof("tidy.") - 1) == 0) {
 		fops = &php_unicode_tidy_filter_ops;
 		charset = direction + sizeof("tidy.") - 1;
@ -303,6 +311,46 @@ static php_stream_filter *php_unicode_filter_create(const char *filtername, zval
 		return NULL;
 	}

+	if (filterparams &&
+		Z_TYPE_P(filterparams) == IS_ARRAY &&
+		zend_hash_find(Z_ARRVAL_P(filterparams), "error_mode", sizeof("error_mode"), (void**)&tmpzval) == SUCCESS &&
+		tmpzval && *tmpzval) {
+		if (Z_TYPE_PP(tmpzval) == IS_LONG) {
+			err_mode = Z_LVAL_PP(tmpzval);
+		} else {
+			zval copyval = **tmpzval;
+			zval_copy_ctor(&copyval);
+			convert_to_long(&copyval);
+			err_mode = Z_LVAL(copyval);
+		}
+	}
+
+	zend_set_converter_error_mode(data->conv, to_unicode ? ZEND_TO_UNICODE : ZEND_FROM_UNICODE, err_mode);
+	if (!to_unicode) {
+		UChar *freeme = NULL;
+		UChar *subst_char = UG(from_subst_char);
+
+		if (filterparams &&
+			Z_TYPE_P(filterparams) == IS_ARRAY &&
+			zend_hash_find(Z_ARRVAL_P(filterparams), "subst_char", sizeof("subst_char"), (void**)&tmpzval) == SUCCESS &&
+			tmpzval && *tmpzval) {
+			if (Z_TYPE_PP(tmpzval) == IS_UNICODE) {
+				subst_char = Z_USTRVAL_PP(tmpzval);
+			} else {
+				zval copyval = **tmpzval;
+				zval_copy_ctor(&copyval);
+				convert_to_unicode(&copyval);
+				subst_char = freeme = Z_USTRVAL(copyval);
+			}
+		}
+
+		zend_set_converter_subst_char(data->conv, subst_char);
+
+		if (freeme) {
+			efree(freeme);
+		}
+	}
+
 	return php_stream_filter_alloc(fops, data, persistent);
 }

--- a/win32/build/config.w32
+++ b/win32/build/config.w32
@ -279,7 +279,7 @@ ADD_SOURCES("main", "main.c snprintf.c spprintf.c fopen_wrappers.c \
 	php_open_temporary_file.c php_logos.c output.c internal_functions.c php_sprintf.c");

 ADD_SOURCES("main/streams", "streams.c cast.c memory.c filter.c plain_wrapper.c \
-	userspace.c transports.c xp_socket.c mmap.c");
+	userspace.c transports.c xp_socket.c mmap.c unicode_filter.c");

 ADD_SOURCES("win32", "crypt_win32.c flock.c glob.c md5crypt.c pwd.c readdir.c \
 	registry.c select.c sendmail.c time.c wfile.c winutil.c wsyslog.c globals.c");