View Javadoc
1   /**
2    * Copyright (C) 2014-2016 Philip Helger (www.helger.com)
3    * philip[at]helger[dot]com
4    *
5    * Licensed under the Apache License, Version 2.0 (the "License");
6    * you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    *         http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package com.helger.schematron.pure;
18  
19  import java.io.File;
20  import java.io.InputStream;
21  import java.net.MalformedURLException;
22  import java.net.URL;
23  import java.nio.charset.Charset;
24  
25  import javax.annotation.Nonnull;
26  import javax.annotation.Nullable;
27  import javax.annotation.concurrent.NotThreadSafe;
28  import javax.xml.transform.Source;
29  import javax.xml.xpath.XPathFunctionResolver;
30  import javax.xml.xpath.XPathVariableResolver;
31  
32  import org.oclc.purl.dsdl.svrl.SchematronOutputType;
33  import org.w3c.dom.Document;
34  import org.w3c.dom.Node;
35  
36  import com.helger.commons.ValueEnforcer;
37  import com.helger.commons.annotation.Nonempty;
38  import com.helger.commons.charset.CharsetManager;
39  import com.helger.commons.io.IHasInputStream;
40  import com.helger.commons.io.resource.ClassPathResource;
41  import com.helger.commons.io.resource.FileSystemResource;
42  import com.helger.commons.io.resource.IReadableResource;
43  import com.helger.commons.io.resource.URLResource;
44  import com.helger.commons.io.resource.inmemory.AbstractMemoryReadableResource;
45  import com.helger.commons.io.resource.inmemory.ReadableResourceByteArray;
46  import com.helger.commons.io.resource.inmemory.ReadableResourceInputStream;
47  import com.helger.commons.state.EValidity;
48  import com.helger.schematron.AbstractSchematronResource;
49  import com.helger.schematron.SchematronException;
50  import com.helger.schematron.SchematronResourceHelper;
51  import com.helger.schematron.pure.bound.IPSBoundSchema;
52  import com.helger.schematron.pure.bound.PSBoundSchemaCache;
53  import com.helger.schematron.pure.bound.PSBoundSchemaCacheKey;
54  import com.helger.schematron.pure.errorhandler.DoNothingPSErrorHandler;
55  import com.helger.schematron.pure.errorhandler.IPSErrorHandler;
56  import com.helger.schematron.pure.exchange.PSWriter;
57  import com.helger.schematron.pure.model.PSSchema;
58  import com.helger.schematron.svrl.SVRLWriter;
59  import com.helger.xml.serialize.read.DOMReader;
60  import com.helger.xml.serialize.write.XMLWriterSettings;
61  
62  /**
63   * A Schematron resource that is not XSLT based but using the pure (native Java)
64   * implementation. This class itself is not thread safe, but the underlying
65   * cache is thread safe. So once you configured this object fully (with all the
66   * setter), it can be considered thread safe.<br>
67   * <b>Important:</b> This class can <u>only</u> handle XPath expressions but no
68   * XSLT functions in Schematron asserts and reports!
69   *
70   * @author Philip Helger
71   */
72  @NotThreadSafe
73  public class SchematronResourcePure extends AbstractSchematronResource
74  {
75    private String m_sPhase;
76    private IPSErrorHandler m_aErrorHandler;
77    private XPathVariableResolver m_aVariableResolver;
78    private XPathFunctionResolver m_aFunctionResolver;
79    // Status var
80    private IPSBoundSchema m_aBoundSchema;
81  
82    public SchematronResourcePure (@Nonnull final IReadableResource aResource)
83    {
84      this (aResource, (String) null, (IPSErrorHandler) null);
85    }
86  
87    public SchematronResourcePure (@Nonnull final IReadableResource aResource,
88                                   @Nullable final String sPhase,
89                                   @Nullable final IPSErrorHandler aErrorHandler)
90    {
91      super (aResource);
92      setPhase (sPhase);
93      setErrorHandler (aErrorHandler);
94    }
95  
96    /**
97     * @return The phase to be used. May be <code>null</code>.
98     */
99    @Nullable
100   public String getPhase ()
101   {
102     return m_sPhase;
103   }
104 
105   /**
106    * Set the Schematron phase to be evaluated. Changing the phase will result in
107    * a newly bound schema!
108    *
109    * @param sPhase
110    *        The name of the phase to use. May be <code>null</code> which means
111    *        all phases.
112    * @return this
113    */
114   @Nonnull
115   public SchematronResourcePure setPhase (@Nullable final String sPhase)
116   {
117     if (m_aBoundSchema != null)
118       throw new IllegalStateException ("Schematron was already bound and can therefore not be altered!");
119     m_sPhase = sPhase;
120     return this;
121   }
122 
123   /**
124    * @return The error handler to be used to bind the schema. May be
125    *         <code>null</code>.
126    */
127   @Nullable
128   public IPSErrorHandler getErrorHandler ()
129   {
130     return m_aErrorHandler;
131   }
132 
133   /**
134    * Set the error handler to be used during binding.
135    *
136    * @param aErrorHandler
137    *        The error handler. May be <code>null</code>.
138    * @return this
139    */
140   @Nonnull
141   public SchematronResourcePure setErrorHandler (@Nullable final IPSErrorHandler aErrorHandler)
142   {
143     if (m_aBoundSchema != null)
144       throw new IllegalStateException ("Schematron was already bound and can therefore not be altered!");
145     m_aErrorHandler = aErrorHandler;
146     return this;
147   }
148 
149   /**
150    * @return The variable resolver to be used. May be <code>null</code>.
151    */
152   @Nullable
153   public XPathVariableResolver getVariableResolver ()
154   {
155     return m_aVariableResolver;
156   }
157 
158   /**
159    * Set the variable resolver to be used in the XPath statements. This can only
160    * be set before the Schematron is bound. If it is already bound an exception
161    * is thrown to indicate the unnecessity of the call.
162    *
163    * @param aVariableResolver
164    *        The variable resolver to set. May be <code>null</code>.
165    * @return this
166    */
167   @Nonnull
168   public SchematronResourcePure setVariableResolver (@Nullable final XPathVariableResolver aVariableResolver)
169   {
170     if (m_aBoundSchema != null)
171       throw new IllegalStateException ("Schematron was already bound and can therefore not be altered!");
172     m_aVariableResolver = aVariableResolver;
173     return this;
174   }
175 
176   /**
177    * @return The function resolver to be used. May be <code>null</code>.
178    */
179   @Nullable
180   public XPathFunctionResolver getFunctionResolver ()
181   {
182     return m_aFunctionResolver;
183   }
184 
185   /**
186    * Set the function resolver to be used in the XPath statements. This can only
187    * be set before the Schematron is bound. If it is already bound an exception
188    * is thrown to indicate the unnecessity of the call.
189    *
190    * @param aFunctionResolver
191    *        The function resolver to set. May be <code>null</code>.
192    * @return this
193    */
194   @Nonnull
195   public SchematronResourcePure setFunctionResolver (@Nullable final XPathFunctionResolver aFunctionResolver)
196   {
197     if (m_aBoundSchema != null)
198       throw new IllegalStateException ("Schematron was already bound and can therefore not be altered!");
199     m_aFunctionResolver = aFunctionResolver;
200     return this;
201   }
202 
203   @Nonnull
204   protected IPSBoundSchema createBoundSchema ()
205   {
206     final IReadableResource aResource = getResource ();
207     final IPSErrorHandler aErrorHandler = getErrorHandler ();
208     final PSBoundSchemaCacheKey aCacheKey = new PSBoundSchemaCacheKey (aResource,
209                                                                        getPhase (),
210                                                                        aErrorHandler,
211                                                                        getVariableResolver (),
212                                                                        getFunctionResolver ());
213     if (aResource instanceof AbstractMemoryReadableResource)
214     {
215       // No need to cache anything for memory resources
216       try
217       {
218         return aCacheKey.createBoundSchema ();
219       }
220       catch (final SchematronException ex)
221       {
222         // Convert to runtime exception
223         throw new IllegalStateException ("Failed to bind Schematron", ex);
224       }
225     }
226 
227     // Resolve from cache - inside the cacheKey the reading and binding
228     // happens
229     return PSBoundSchemaCache.getInstance ().getFromCache (aCacheKey);
230   }
231 
232   @Nonnull
233   protected IPSBoundSchema getOrCreateBoundSchema ()
234   {
235     if (m_aBoundSchema == null)
236       try
237       {
238         m_aBoundSchema = createBoundSchema ();
239       }
240       catch (final RuntimeException ex)
241       {
242         if (m_aErrorHandler != null)
243           m_aErrorHandler.error (getResource (), null, "Error creating bound schema", ex);
244         throw ex;
245       }
246 
247     return m_aBoundSchema;
248   }
249 
250   public boolean isValidSchematron ()
251   {
252     // Use the provided error handler (if any)
253     try
254     {
255       final IPSErrorHandler aErrorHandler = m_aErrorHandler != null ? m_aErrorHandler : new DoNothingPSErrorHandler ();
256       return getOrCreateBoundSchema ().getOriginalSchema ().isValid (aErrorHandler);
257     }
258     catch (final RuntimeException ex)
259     {
260       // May happen when XPath errors are contained
261       return false;
262     }
263   }
264 
265   /**
266    * Use the internal error handler to validate all elements in the schematron.
267    * It tries to catch as many errors as possible.
268    */
269   public void validateCompletely ()
270   {
271     // Use the provided error handler (if any)
272     final IPSErrorHandler aErrorHandler = m_aErrorHandler != null ? m_aErrorHandler : new DoNothingPSErrorHandler ();
273     validateCompletely (aErrorHandler);
274   }
275 
276   /**
277    * Use the provided error handler to validate all elements in the schematron.
278    * It tries to catch as many errors as possible.
279    *
280    * @param aErrorHandler
281    *        The error handler to use. May not be <code>null</code>.
282    */
283   public void validateCompletely (@Nonnull final IPSErrorHandler aErrorHandler)
284   {
285     ValueEnforcer.notNull (aErrorHandler, "ErrorHandler");
286 
287     try
288     {
289       getOrCreateBoundSchema ().getOriginalSchema ().validateCompletely (aErrorHandler);
290     }
291     catch (final RuntimeException ex)
292     {
293       // May happen when XPath errors are contained
294     }
295   }
296 
297   /**
298    * The main method to convert a node to an SVRL document.
299    *
300    * @param aXMLNode
301    *        The source node to be validated. May not be <code>null</code>.
302    * @return The SVRL document. Never <code>null</code>.
303    * @throws SchematronException
304    *         in case of a sever error validating the schema
305    */
306   @Nonnull
307   public SchematronOutputType applySchematronValidationToSVRL (@Nonnull final Node aXMLNode) throws SchematronException
308   {
309     return getOrCreateBoundSchema ().validateComplete (aXMLNode);
310   }
311 
312   @Nonnull
313   public EValidity getSchematronValidity (@Nonnull final IHasInputStream aXMLResource) throws Exception
314   {
315     if (!isValidSchematron ())
316       return EValidity.INVALID;
317 
318     final Document aDoc = DOMReader.readXMLDOM (aXMLResource.getInputStream ());
319     if (aDoc == null)
320       throw new IllegalArgumentException ("Failed to read resource " + aXMLResource + " as XML");
321 
322     return getOrCreateBoundSchema ().validatePartially (aDoc);
323   }
324 
325   @Nonnull
326   public EValidity getSchematronValidity (@Nonnull final Source aXMLSource) throws Exception
327   {
328     if (!isValidSchematron ())
329       return EValidity.INVALID;
330 
331     // Convert Source to Node
332     final Node aNode = SchematronResourceHelper.getNodeOfSource (aXMLSource);
333     if (aNode == null)
334       return EValidity.INVALID;
335 
336     return getOrCreateBoundSchema ().validatePartially (aNode);
337   }
338 
339   @Nullable
340   public Document applySchematronValidation (@Nonnull final IHasInputStream aXMLResource) throws Exception
341   {
342     final SchematronOutputType aSO = applySchematronValidationToSVRL (aXMLResource);
343     return aSO == null ? null : SVRLWriter.createXML (aSO);
344   }
345 
346   @Nullable
347   public Document applySchematronValidation (@Nonnull final Source aXMLSource) throws Exception
348   {
349     final SchematronOutputType aSO = applySchematronValidationToSVRL (aXMLSource);
350     return aSO == null ? null : SVRLWriter.createXML (aSO);
351   }
352 
353   @Nullable
354   public SchematronOutputType applySchematronValidationToSVRL (@Nonnull final IHasInputStream aXMLResource) throws Exception
355   {
356     ValueEnforcer.notNull (aXMLResource, "XMLResource");
357 
358     if (!isValidSchematron ())
359       return null;
360 
361     final InputStream aIS = aXMLResource.getInputStream ();
362     if (aIS == null)
363       return null;
364 
365     final Document aDoc = DOMReader.readXMLDOM (aIS);
366     if (aDoc == null)
367       throw new IllegalArgumentException ("Failed to read resource " + aXMLResource + " as XML");
368 
369     return applySchematronValidationToSVRL (aDoc);
370   }
371 
372   @Nullable
373   public SchematronOutputType applySchematronValidationToSVRL (@Nonnull final Source aXMLSource) throws Exception
374   {
375     ValueEnforcer.notNull (aXMLSource, "XMLSource");
376 
377     if (!isValidSchematron ())
378       return null;
379 
380     // Convert to Node
381     final Node aNode = SchematronResourceHelper.getNodeOfSource (aXMLSource);
382     if (aNode == null)
383       return null;
384 
385     return applySchematronValidationToSVRL (aNode);
386   }
387 
388   /**
389    * Create a new {@link SchematronResourcePure} from a Classpath Schematron
390    * rules
391    *
392    * @param sSCHPath
393    *        The classpath relative path to the Schematron rules.
394    * @return Never <code>null</code>.
395    */
396   @Nonnull
397   public static SchematronResourcePure fromClassPath (@Nonnull @Nonempty final String sSCHPath)
398   {
399     return new SchematronResourcePure (new ClassPathResource (sSCHPath));
400   }
401 
402   /**
403    * Create a new {@link SchematronResourcePure} from file system Schematron
404    * rules
405    *
406    * @param sSCHPath
407    *        The file system path to the Schematron rules.
408    * @return Never <code>null</code>.
409    */
410   @Nonnull
411   public static SchematronResourcePure fromFile (@Nonnull @Nonempty final String sSCHPath)
412   {
413     return new SchematronResourcePure (new FileSystemResource (sSCHPath));
414   }
415 
416   /**
417    * Create a new {@link SchematronResourcePure} from file system Schematron
418    * rules
419    *
420    * @param aSCHFile
421    *        The file system path to the Schematron rules.
422    * @return Never <code>null</code>.
423    */
424   @Nonnull
425   public static SchematronResourcePure fromFile (@Nonnull final File aSCHFile)
426   {
427     return new SchematronResourcePure (new FileSystemResource (aSCHFile));
428   }
429 
430   /**
431    * Create a new {@link SchematronResourcePure} from Schematron rules provided
432    * at a URL
433    *
434    * @param sSCHURL
435    *        The URL to the Schematron rules. May neither be <code>null</code>
436    *        nor empty.
437    * @return Never <code>null</code>.
438    * @throws MalformedURLException
439    *         In case an invalid URL is provided
440    */
441   @Nonnull
442   public static SchematronResourcePure fromURL (@Nonnull @Nonempty final String sSCHURL) throws MalformedURLException
443   {
444     return new SchematronResourcePure (new URLResource (sSCHURL));
445   }
446 
447   /**
448    * Create a new {@link SchematronResourcePure} from Schematron rules provided
449    * at a URL
450    *
451    * @param aSCHURL
452    *        The URL to the Schematron rules. May not be <code>null</code>.
453    * @return Never <code>null</code>.
454    */
455   @Nonnull
456   public static SchematronResourcePure fromURL (@Nonnull final URL aSCHURL)
457   {
458     return new SchematronResourcePure (new URLResource (aSCHURL));
459   }
460 
461   /**
462    * Create a new {@link SchematronResourcePure} from Schematron rules provided
463    * by an arbitrary {@link InputStream}.<br>
464    * <b>Important:</b> in this case, no include resolution will be performed!!
465    *
466    * @param aSchematronIS
467    *        The {@link InputStream} to read the Schematron rules from. May not
468    *        be <code>null</code>.
469    * @return Never <code>null</code>.
470    */
471   @Nonnull
472   public static SchematronResourcePure fromInputStream (@Nonnull final InputStream aSchematronIS)
473   {
474     return new SchematronResourcePure (new ReadableResourceInputStream (aSchematronIS));
475   }
476 
477   /**
478    * Create a new {@link SchematronResourcePure} from Schematron rules provided
479    * by an arbitrary byte array.<br>
480    * <b>Important:</b> in this case, no include resolution will be performed!!
481    *
482    * @param aSchematron
483    *        The byte array representing the Schematron. May not be
484    *        <code>null</code>.
485    * @return Never <code>null</code>.
486    */
487   @Nonnull
488   public static SchematronResourcePure fromByteArray (@Nonnull final byte [] aSchematron)
489   {
490     return new SchematronResourcePure (new ReadableResourceByteArray (aSchematron));
491   }
492 
493   /**
494    * Create a new {@link SchematronResourcePure} from Schematron rules provided
495    * by an arbitrary String.<br>
496    * <b>Important:</b> in this case, no include resolution will be performed!!
497    *
498    * @param sSchematron
499    *        The String representing the Schematron. May not be <code>null</code>
500    *        .
501    * @param aCharset
502    *        The charset to be used to convert the String to a byte array.
503    * @return Never <code>null</code>.
504    */
505   @Nonnull
506   public static SchematronResourcePure fromString (@Nonnull final String sSchematron, @Nonnull final Charset aCharset)
507   {
508     return fromByteArray (CharsetManager.getAsBytes (sSchematron, aCharset));
509   }
510 
511   /**
512    * Create a new {@link SchematronResourcePure} from Schematron rules provided
513    * by a domain model.<br>
514    * <b>Important:</b> in this case, no include resolution will be performed!!
515    *
516    * @param aSchematron
517    *        The Schematron model to be used. May not be <code>null</code> .
518    * @return Never <code>null</code>.
519    */
520   @Nonnull
521   public static SchematronResourcePure fromSchema (@Nonnull final PSSchema aSchematron)
522   {
523     return fromString (new PSWriter ().getXMLString (aSchematron), XMLWriterSettings.DEFAULT_XML_CHARSET_OBJ);
524   }
525 }