View Javadoc
1   /**
2    * Copyright (C) 2014-2018 Philip Helger (www.helger.com)
3    * philip[at]helger[dot]com
4    *
5    * Licensed under the Apache License, Version 2.0 (the "License");
6    * you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    *         http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package com.helger.schematron.pure;
18  
19  import java.io.File;
20  import java.io.InputStream;
21  import java.net.MalformedURLException;
22  import java.net.URL;
23  import java.nio.charset.Charset;
24  
25  import javax.annotation.Nonnull;
26  import javax.annotation.Nullable;
27  import javax.annotation.concurrent.NotThreadSafe;
28  import javax.xml.xpath.XPathFunctionResolver;
29  import javax.xml.xpath.XPathVariableResolver;
30  
31  import org.oclc.purl.dsdl.svrl.SchematronOutputType;
32  import org.slf4j.Logger;
33  import org.slf4j.LoggerFactory;
34  import org.w3c.dom.Document;
35  import org.w3c.dom.Node;
36  import org.xml.sax.EntityResolver;
37  
38  import com.helger.commons.ValueEnforcer;
39  import com.helger.commons.annotation.Nonempty;
40  import com.helger.commons.io.resource.ClassPathResource;
41  import com.helger.commons.io.resource.FileSystemResource;
42  import com.helger.commons.io.resource.IReadableResource;
43  import com.helger.commons.io.resource.URLResource;
44  import com.helger.commons.io.resource.inmemory.AbstractMemoryReadableResource;
45  import com.helger.commons.io.resource.inmemory.ReadableResourceByteArray;
46  import com.helger.commons.io.resource.inmemory.ReadableResourceInputStream;
47  import com.helger.commons.state.EValidity;
48  import com.helger.schematron.AbstractSchematronResource;
49  import com.helger.schematron.SchematronDebug;
50  import com.helger.schematron.SchematronException;
51  import com.helger.schematron.pure.bound.IPSBoundSchema;
52  import com.helger.schematron.pure.bound.PSBoundSchemaCache;
53  import com.helger.schematron.pure.bound.PSBoundSchemaCacheKey;
54  import com.helger.schematron.pure.errorhandler.DoNothingPSErrorHandler;
55  import com.helger.schematron.pure.errorhandler.IPSErrorHandler;
56  import com.helger.schematron.pure.exchange.PSWriter;
57  import com.helger.schematron.pure.model.PSSchema;
58  import com.helger.schematron.svrl.SVRLMarshaller;
59  import com.helger.xml.serialize.write.XMLWriterSettings;
60  
61  /**
62   * A Schematron resource that is not XSLT based but using the pure (native Java)
63   * implementation. This class itself is not thread safe, but the underlying
64   * cache is thread safe. So once you configured this object fully (with all the
65   * setter), it can be considered thread safe.<br>
66   * <b>Important:</b> This class can <u>only</u> handle XPath expressions but no
67   * XSLT functions in Schematron asserts and reports! If your Schematrons use
68   * XSLT functionality you're better off using the
69   * {@link com.helger.schematron.xslt.SchematronResourceSCH} or
70   * {@link com.helger.schematron.xslt.SchematronResourceXSLT} classes instead!
71   *
72   * @author Philip Helger
73   */
74  @NotThreadSafe
75  public class SchematronResourcePure extends AbstractSchematronResource
76  {
77    private static final Logger LOGGER = LoggerFactory.getLogger (SchematronResourcePure.class);
78  
79    private String m_sPhase;
80    private IPSErrorHandler m_aErrorHandler;
81    private XPathVariableResolver m_aVariableResolver;
82    private XPathFunctionResolver m_aFunctionResolver;
83    // Status var
84    private IPSBoundSchema m_aBoundSchema;
85  
86    public SchematronResourcePure (@Nonnull final IReadableResource aResource)
87    {
88      this (aResource, (String) null, (IPSErrorHandler) null);
89    }
90  
91    public SchematronResourcePure (@Nonnull final IReadableResource aResource,
92                                   @Nullable final String sPhase,
93                                   @Nullable final IPSErrorHandler aErrorHandler)
94    {
95      super (aResource);
96      setPhase (sPhase);
97      setErrorHandler (aErrorHandler);
98    }
99  
100   /**
101    * @return The phase to be used. May be <code>null</code>.
102    */
103   @Nullable
104   public String getPhase ()
105   {
106     return m_sPhase;
107   }
108 
109   /**
110    * Set the Schematron phase to be evaluated. Changing the phase will result in
111    * a newly bound schema!
112    *
113    * @param sPhase
114    *        The name of the phase to use. May be <code>null</code> which means
115    *        all phases.
116    * @return this
117    */
118   @Nonnull
119   public SchematronResourcePure setPhase (@Nullable final String sPhase)
120   {
121     if (m_aBoundSchema != null)
122       throw new IllegalStateException ("Schematron was already bound and can therefore not be altered!");
123     m_sPhase = sPhase;
124     return this;
125   }
126 
127   /**
128    * @return The error handler to be used to bind the schema. May be
129    *         <code>null</code>.
130    */
131   @Nullable
132   public IPSErrorHandler getErrorHandler ()
133   {
134     return m_aErrorHandler;
135   }
136 
137   /**
138    * Set the error handler to be used during binding.
139    *
140    * @param aErrorHandler
141    *        The error handler. May be <code>null</code>.
142    * @return this
143    */
144   @Nonnull
145   public SchematronResourcePure setErrorHandler (@Nullable final IPSErrorHandler aErrorHandler)
146   {
147     if (m_aBoundSchema != null)
148       throw new IllegalStateException ("Schematron was already bound and can therefore not be altered!");
149     m_aErrorHandler = aErrorHandler;
150     return this;
151   }
152 
153   /**
154    * @return The variable resolver to be used. May be <code>null</code>.
155    */
156   @Nullable
157   public XPathVariableResolver getVariableResolver ()
158   {
159     return m_aVariableResolver;
160   }
161 
162   /**
163    * Set the variable resolver to be used in the XPath statements. This can only
164    * be set before the Schematron is bound. If it is already bound an exception
165    * is thrown to indicate the unnecessity of the call.
166    *
167    * @param aVariableResolver
168    *        The variable resolver to set. May be <code>null</code>.
169    * @return this
170    */
171   @Nonnull
172   public SchematronResourcePure setVariableResolver (@Nullable final XPathVariableResolver aVariableResolver)
173   {
174     if (m_aBoundSchema != null)
175       throw new IllegalStateException ("Schematron was already bound and can therefore not be altered!");
176     m_aVariableResolver = aVariableResolver;
177     return this;
178   }
179 
180   /**
181    * @return The function resolver to be used. May be <code>null</code>.
182    */
183   @Nullable
184   public XPathFunctionResolver getFunctionResolver ()
185   {
186     return m_aFunctionResolver;
187   }
188 
189   /**
190    * Set the function resolver to be used in the XPath statements. This can only
191    * be set before the Schematron is bound. If it is already bound an exception
192    * is thrown to indicate the unnecessity of the call.
193    *
194    * @param aFunctionResolver
195    *        The function resolver to set. May be <code>null</code>.
196    * @return this
197    */
198   @Nonnull
199   public SchematronResourcePure setFunctionResolver (@Nullable final XPathFunctionResolver aFunctionResolver)
200   {
201     if (m_aBoundSchema != null)
202       throw new IllegalStateException ("Schematron was already bound and can therefore not be altered!");
203     m_aFunctionResolver = aFunctionResolver;
204     return this;
205   }
206 
207   /**
208    * Set the XML entity resolver to be used when reading the Schematron or the
209    * XML to be validated. This can only be set before the Schematron is bound.
210    * If it is already bound an exception is thrown to indicate the unnecessity
211    * of the call.
212    *
213    * @param aEntityResolver
214    *        The entity resolver to set. May be <code>null</code>.
215    * @return this
216    * @since 4.1.1
217    */
218   @Nonnull
219   public SchematronResourcePure setEntityResolver (@Nullable final EntityResolver aEntityResolver)
220   {
221     if (m_aBoundSchema != null)
222       throw new IllegalStateException ("Schematron was already bound and can therefore not be altered!");
223     internalSetEntityResolver (aEntityResolver);
224     return this;
225   }
226 
227   @Nonnull
228   protected IPSBoundSchema createBoundSchema ()
229   {
230     final IReadableResource aResource = getResource ();
231     final IPSErrorHandler aErrorHandler = getErrorHandler ();
232     final PSBoundSchemaCacheKeyCacheKey.html#PSBoundSchemaCacheKey">PSBoundSchemaCacheKey aCacheKey = new PSBoundSchemaCacheKey (aResource,
233                                                                        getPhase (),
234                                                                        aErrorHandler,
235                                                                        getVariableResolver (),
236                                                                        getFunctionResolver (),
237                                                                        getEntityResolver ());
238     if (aResource instanceof AbstractMemoryReadableResource || !isUseCache ())
239     {
240       // No need to cache anything for memory resources
241       try
242       {
243         return aCacheKey.createBoundSchema ();
244       }
245       catch (final SchematronException ex)
246       {
247         // Convert to runtime exception
248         throw new IllegalStateException ("Failed to bind Schematron", ex);
249       }
250     }
251 
252     // Resolve from cache - inside the cacheKey the reading and binding
253     // happens
254     return PSBoundSchemaCache.getInstance ().getFromCache (aCacheKey);
255   }
256 
257   /**
258    * Get the cached bound schema or create a new one.
259    *
260    * @return The bound schema. Never <code>null</code>.
261    */
262   @Nonnull
263   public IPSBoundSchema getOrCreateBoundSchema ()
264   {
265     if (m_aBoundSchema == null)
266       try
267       {
268         m_aBoundSchema = createBoundSchema ();
269       }
270       catch (final RuntimeException ex)
271       {
272         if (m_aErrorHandler != null)
273           m_aErrorHandler.error (getResource (), null, "Error creating bound schema", ex);
274         throw ex;
275       }
276 
277     return m_aBoundSchema;
278   }
279 
280   public boolean isValidSchematron ()
281   {
282     // Use the provided error handler (if any)
283     try
284     {
285       final IPSErrorHandler aErrorHandler = m_aErrorHandler != null ? m_aErrorHandler : new DoNothingPSErrorHandler ();
286       return getOrCreateBoundSchema ().getOriginalSchema ().isValid (aErrorHandler);
287     }
288     catch (final RuntimeException ex)
289     {
290       // May happen when XPath errors are contained
291       return false;
292     }
293   }
294 
295   /**
296    * Use the internal error handler to validate all elements in the schematron.
297    * It tries to catch as many errors as possible.
298    */
299   public void validateCompletely ()
300   {
301     // Use the provided error handler (if any)
302     final IPSErrorHandler aErrorHandler = m_aErrorHandler != null ? m_aErrorHandler : new DoNothingPSErrorHandler ();
303     validateCompletely (aErrorHandler);
304   }
305 
306   /**
307    * Use the provided error handler to validate all elements in the schematron.
308    * It tries to catch as many errors as possible.
309    *
310    * @param aErrorHandler
311    *        The error handler to use. May not be <code>null</code>.
312    */
313   public void validateCompletely (@Nonnull final IPSErrorHandler aErrorHandler)
314   {
315     ValueEnforcer.notNull (aErrorHandler, "ErrorHandler");
316 
317     try
318     {
319       getOrCreateBoundSchema ().getOriginalSchema ().validateCompletely (aErrorHandler);
320     }
321     catch (final RuntimeException ex)
322     {
323       // May happen when XPath errors are contained
324     }
325   }
326 
327   @Nonnull
328   public EValidity getSchematronValidity (@Nonnull final Node aXMLNode,
329                                           @Nullable final String sBaseURI) throws Exception
330   {
331     ValueEnforcer.notNull (aXMLNode, "XMLNode");
332 
333     if (!isValidSchematron ())
334       return EValidity.INVALID;
335 
336     return getOrCreateBoundSchema ().validatePartially (aXMLNode, sBaseURI);
337   }
338 
339   /**
340    * The main method to convert a node to an SVRL document.
341    *
342    * @param aXMLNode
343    *        The source node to be validated. May not be <code>null</code>.
344    * @param sBaseURI
345    *        Base URI of the XML document to be validated. May be
346    *        <code>null</code>.
347    * @return The SVRL document. Never <code>null</code>.
348    * @throws SchematronException
349    *         in case of a sever error validating the schema
350    */
351   @Nonnull
352   public SchematronOutputType applySchematronValidationToSVRL (@Nonnull final Node aXMLNode,
353                                                                @Nullable final String sBaseURI) throws SchematronException
354   {
355     ValueEnforcer.notNull (aXMLNode, "XMLNode");
356 
357     final SchematronOutputType aSOT = getOrCreateBoundSchema ().validateComplete (aXMLNode, sBaseURI);
358 
359     // Debug print the created SVRL document
360     if (SchematronDebug.isShowCreatedSVRL ())
361       LOGGER.info ("Created SVRL:\n" + new SVRLMarshaller (false).getAsString (aSOT));
362 
363     return aSOT;
364   }
365 
366   @Nullable
367   public Document applySchematronValidation (@Nonnull final Node aXMLNode,
368                                              @Nullable final String sBaseURI) throws Exception
369   {
370     ValueEnforcer.notNull (aXMLNode, "XMLNode");
371 
372     final SchematronOutputType aSO = applySchematronValidationToSVRL (aXMLNode, sBaseURI);
373     return new SVRLMarshaller ().getAsDocument (aSO);
374   }
375 
376   /**
377    * Create a new {@link SchematronResourcePure} from a Classpath Schematron
378    * rules
379    *
380    * @param sSCHPath
381    *        The classpath relative path to the Schematron rules.
382    * @return Never <code>null</code>.
383    */
384   @Nonnull
385   public static SchematronResourcePure fromClassPath (@Nonnull @Nonempty final String sSCHPath)
386   {
387     return new SchematronResourcePure (new ClassPathResource (sSCHPath));
388   }
389 
390   /**
391    * Create a new {@link SchematronResourcePure} from file system Schematron
392    * rules
393    *
394    * @param sSCHPath
395    *        The file system path to the Schematron rules.
396    * @return Never <code>null</code>.
397    */
398   @Nonnull
399   public static SchematronResourcePure fromFile (@Nonnull @Nonempty final String sSCHPath)
400   {
401     return new SchematronResourcePure (new FileSystemResource (sSCHPath));
402   }
403 
404   /**
405    * Create a new {@link SchematronResourcePure} from file system Schematron
406    * rules
407    *
408    * @param aSCHFile
409    *        The file system path to the Schematron rules.
410    * @return Never <code>null</code>.
411    */
412   @Nonnull
413   public static SchematronResourcePure fromFile (@Nonnull final File aSCHFile)
414   {
415     return new SchematronResourcePure (new FileSystemResource (aSCHFile));
416   }
417 
418   /**
419    * Create a new {@link SchematronResourcePure} from Schematron rules provided
420    * at a URL
421    *
422    * @param sSCHURL
423    *        The URL to the Schematron rules. May neither be <code>null</code>
424    *        nor empty.
425    * @return Never <code>null</code>.
426    * @throws MalformedURLException
427    *         In case an invalid URL is provided
428    */
429   @Nonnull
430   public static SchematronResourcePure fromURL (@Nonnull @Nonempty final String sSCHURL) throws MalformedURLException
431   {
432     return new SchematronResourcePure (new URLResource (sSCHURL));
433   }
434 
435   /**
436    * Create a new {@link SchematronResourcePure} from Schematron rules provided
437    * at a URL
438    *
439    * @param aSCHURL
440    *        The URL to the Schematron rules. May not be <code>null</code>.
441    * @return Never <code>null</code>.
442    */
443   @Nonnull
444   public static SchematronResourcePure fromURL (@Nonnull final URL aSCHURL)
445   {
446     return new SchematronResourcePure (new URLResource (aSCHURL));
447   }
448 
449   /**
450    * Create a new {@link SchematronResourcePure} from Schematron rules provided
451    * by an arbitrary {@link InputStream}.<br>
452    * <b>Important:</b> in this case, no include resolution will be performed!!
453    *
454    * @param aSchematronIS
455    *        The {@link InputStream} to read the Schematron rules from. May not
456    *        be <code>null</code>.
457    * @return Never <code>null</code>.
458    */
459   @Nonnull
460   public static SchematronResourcePure fromInputStream (@Nonnull final InputStream aSchematronIS)
461   {
462     return new SchematronResourcePure (new ReadableResourceInputStream (aSchematronIS));
463   }
464 
465   /**
466    * Create a new {@link SchematronResourcePure} from Schematron rules provided
467    * by an arbitrary byte array.<br>
468    * <b>Important:</b> in this case, no include resolution will be performed!!
469    *
470    * @param aSchematron
471    *        The byte array representing the Schematron. May not be
472    *        <code>null</code>.
473    * @return Never <code>null</code>.
474    */
475   @Nonnull
476   public static SchematronResourcePure fromByteArray (@Nonnull final byte [] aSchematron)
477   {
478     return new SchematronResourcePure (new ReadableResourceByteArray (aSchematron));
479   }
480 
481   /**
482    * Create a new {@link SchematronResourcePure} from Schematron rules provided
483    * by an arbitrary String.<br>
484    * <b>Important:</b> in this case, no include resolution will be performed!!
485    *
486    * @param sSchematron
487    *        The String representing the Schematron. May not be <code>null</code>
488    *        .
489    * @param aCharset
490    *        The charset to be used to convert the String to a byte array.
491    * @return Never <code>null</code>.
492    */
493   @Nonnull
494   public static SchematronResourcePure fromString (@Nonnull final String sSchematron, @Nonnull final Charset aCharset)
495   {
496     return fromByteArray (sSchematron.getBytes (aCharset));
497   }
498 
499   /**
500    * Create a new {@link SchematronResourcePure} from Schematron rules provided
501    * by a domain model.<br>
502    * <b>Important:</b> in this case, no include resolution will be performed!!
503    *
504    * @param aSchematron
505    *        The Schematron model to be used. May not be <code>null</code> .
506    * @return Never <code>null</code>.
507    */
508   @Nonnull
509   public static SchematronResourcePure fromSchema (@Nonnull final PSSchema aSchematron)
510   {
511     return fromString (new PSWriter ().getXMLString (aSchematron), XMLWriterSettings.DEFAULT_XML_CHARSET_OBJ);
512   }
513 }