summaryrefslogtreecommitdiff
path: root/includes/utils/AvroValidator.php
diff options
context:
space:
mode:
Diffstat (limited to 'includes/utils/AvroValidator.php')
-rw-r--r--includes/utils/AvroValidator.php184
1 files changed, 184 insertions, 0 deletions
diff --git a/includes/utils/AvroValidator.php b/includes/utils/AvroValidator.php
new file mode 100644
index 00000000..4f8e0b17
--- /dev/null
+++ b/includes/utils/AvroValidator.php
@@ -0,0 +1,184 @@
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * Generate error strings for data that doesn't match the specified
+ * Avro schema. This is very similar to AvroSchema::is_valid_datum(),
+ * but returns error messages instead of a boolean.
+ *
+ * @since 1.26
+ * @author Erik Bernhardson <ebernhardson@wikimedia.org>
+ * @copyright © 2015 Erik Bernhardson and Wikimedia Foundation.
+ */
+class AvroValidator {
+ /**
+ * @param AvroSchema $schema The rules to conform to.
+ * @param mixed $datum The value to validate against $schema.
+ * @return string|string[] An error or list of errors in the
+ * provided $datum. When no errors exist the empty array is
+ * returned.
+ */
+ public static function getErrors( AvroSchema $schema, $datum ) {
+ switch ( $schema->type) {
+ case AvroSchema::NULL_TYPE:
+ if ( !is_null($datum) ) {
+ return self::wrongType( 'null', $datum );
+ }
+ return array();
+ case AvroSchema::BOOLEAN_TYPE:
+ if ( !is_bool($datum) ) {
+ return self::wrongType( 'boolean', $datum );
+ }
+ return array();
+ case AvroSchema::STRING_TYPE:
+ case AvroSchema::BYTES_TYPE:
+ if ( !is_string($datum) ) {
+ return self::wrongType( 'string', $datum );
+ }
+ return array();
+ case AvroSchema::INT_TYPE:
+ if ( !is_int($datum) ) {
+ return self::wrongType( 'integer', $datum );
+ }
+ if ( AvroSchema::INT_MIN_VALUE > $datum
+ || $datum > AvroSchema::INT_MAX_VALUE
+ ) {
+ return self::outOfRange(
+ AvroSchema::INT_MIN_VALUE,
+ AvroSchema::INT_MAX_VALUE,
+ $datum
+ );
+ }
+ return array();
+ case AvroSchema::LONG_TYPE:
+ if ( !is_int($datum) ) {
+ return self::wrongType( 'integer', $datum );
+ }
+ if ( AvroSchema::LONG_MIN_VALUE > $datum
+ || $datum > AvroSchema::LONG_MAX_VALUE
+ ) {
+ return self::outOfRange(
+ AvroSchema::LONG_MIN_VALUE,
+ AvroSchema::LONG_MAX_VALUE,
+ $datum
+ );
+ }
+ return array();
+ case AvroSchema::FLOAT_TYPE:
+ case AvroSchema::DOUBLE_TYPE:
+ if ( !is_float($datum) && !is_int($datum) ) {
+ return self::wrongType( 'float or integer', $datum );
+ }
+ return array();
+ case AvroSchema::ARRAY_SCHEMA:
+ if (!is_array($datum)) {
+ return self::wrongType( 'array', $datum );
+ }
+ $errors = array();
+ foreach ($datum as $d) {
+ $result = $this->validate( $schema->items(), $d );
+ if ( $result ) {
+ $errors[] = $result;
+ }
+ }
+ if ( $errors ) {
+ return $errors;
+ }
+ return array();
+ case AvroSchema::MAP_SCHEMA:
+ if (!is_array($datum)) {
+ return self::wrongType( 'array', $datum );
+ }
+ $errors = array();
+ foreach ($datum as $k => $v) {
+ if ( !is_string($k) ) {
+ $errors[] = self::wrongType( 'string key', $k );
+ }
+ $result = self::getErrors( $schema->values(), $v );
+ if ( $result ) {
+ $errors[$k] = $result;
+ }
+ }
+ return $errors;
+ case AvroSchema::UNION_SCHEMA:
+ $errors = array();
+ foreach ($schema->schemas() as $schema) {
+ $result = self::getErrors( $schema, $datum );
+ if ( !$result ) {
+ return array();
+ }
+ $errors[] = $result;
+ }
+ if ( $errors ) {
+ return array( "Expected any one of these to be true", $errors );
+ }
+ return "No schemas provided to union";
+ case AvroSchema::ENUM_SCHEMA:
+ if ( !in_array( $datum, $schema->symbols() ) ) {
+ $symbols = implode( ', ', $schema->symbols );
+ return "Expected one of $symbols but recieved $datum";
+ }
+ return array();
+ case AvroSchema::FIXED_SCHEMA:
+ if ( !is_string( $datum ) ) {
+ return self::wrongType( 'string', $datum );
+ }
+ $len = strlen( $datum );
+ if ( $len !== $schema->size() ) {
+ return "Expected string of length {$schema->size()}, "
+ . "but recieved one of length $len";
+ }
+ return array();
+ case AvroSchema::RECORD_SCHEMA:
+ case AvroSchema::ERROR_SCHEMA:
+ case AvroSchema::REQUEST_SCHEMA:
+ if ( !is_array( $datum ) ) {
+ return self::wrongType( 'array', $datum );
+ }
+ $errors = array();
+ foreach ( $schema->fields() as $field ) {
+ $name = $field->name();
+ if ( !array_key_exists( $name, $datum ) ) {
+ $errors[$name] = 'Missing expected field';
+ continue;
+ }
+ $result = self::getErrors( $field->type(), $datum[$name] );
+ if ( $result ) {
+ $errors[$name] = $result;
+ }
+ }
+ return $errors;
+ default:
+ return "Unknown avro schema type: {$schema->type}";
+ }
+ }
+
+ public static function typeOf( $datum ) {
+ return is_object( $datum ) ? get_class( $datum ) : gettype( $datum );
+ }
+
+ public static function wrongType( $expected, $datum ) {
+ return "Expected $expected, but recieved " . self::typeOf( $datum );
+ }
+
+ public static function outOfRange( $min, $max, $datum ) {
+ return "Expected value between $min and $max, but recieved $datum";
+ }
+}