View Javadoc
1   /**
2    * Copyright (C) 2014-2015 Philip Helger (www.helger.com)
3    * philip[at]helger[dot]com
4    *
5    * Licensed under the Apache License, Version 2.0 (the "License");
6    * you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    *         http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package com.helger.schematron;
18  
19  import java.io.IOException;
20  
21  import javax.annotation.Nonnull;
22  import javax.annotation.Nullable;
23  import javax.annotation.concurrent.Immutable;
24  import javax.xml.transform.Source;
25  import javax.xml.transform.dom.DOMSource;
26  
27  import org.oclc.purl.dsdl.svrl.SchematronOutputType;
28  import org.slf4j.Logger;
29  import org.slf4j.LoggerFactory;
30  import org.w3c.dom.Node;
31  
32  import com.helger.commons.ValueEnforcer;
33  import com.helger.commons.annotation.PresentForCodeCoverage;
34  import com.helger.commons.error.IResourceErrorGroup;
35  import com.helger.commons.error.ResourceErrorGroup;
36  import com.helger.commons.hierarchy.visit.DefaultHierarchyVisitorCallback;
37  import com.helger.commons.hierarchy.visit.EHierarchyVisitorReturn;
38  import com.helger.commons.io.resource.IReadableResource;
39  import com.helger.commons.microdom.IMicroDocument;
40  import com.helger.commons.microdom.IMicroElement;
41  import com.helger.commons.microdom.IMicroNode;
42  import com.helger.commons.microdom.serialize.MicroReader;
43  import com.helger.commons.microdom.util.MicroVisitor;
44  import com.helger.commons.state.ESuccess;
45  import com.helger.commons.wrapper.Wrapper;
46  import com.helger.commons.xml.serialize.read.ISAXReaderSettings;
47  import com.helger.schematron.pure.errorhandler.IPSErrorHandler;
48  import com.helger.schematron.pure.errorhandler.LoggingPSErrorHandler;
49  import com.helger.schematron.resolve.DefaultSchematronIncludeResolver;
50  import com.helger.schematron.svrl.SVRLFailedAssert;
51  import com.helger.schematron.svrl.SVRLHelper;
52  import com.helger.schematron.svrl.SVRLResourceError;
53  
54  import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
55  
56  /**
57   * This is a helper class that provides a way to easily apply an Schematron
58   * resource on an XML resource.
59   *
60   * @author Philip Helger
61   */
62  @Immutable
63  public final class SchematronHelper
64  {
65    private static final Logger s_aLogger = LoggerFactory.getLogger (SchematronHelper.class);
66  
67    @PresentForCodeCoverage
68    private static final SchematronHelper s_aInstance = new SchematronHelper ();
69  
70    private SchematronHelper ()
71    {}
72  
73    /**
74     * Apply the passed schematron on the passed XML resource using a custom error
75     * handler.
76     *
77     * @param aSchematron
78     *        The Schematron resource. May not be <code>null</code>.
79     * @param aXML
80     *        The XML resource. May not be <code>null</code>.
81     * @return <code>null</code> if either the Schematron or the XML could not be
82     *         read.
83     * @throws IllegalStateException
84     *         if the processing throws an unexpected exception.
85     */
86    @Nullable
87    public static SchematronOutputType applySchematron (@Nonnull final ISchematronResource aSchematron,
88                                                        @Nonnull final IReadableResource aXML)
89    {
90      ValueEnforcer.notNull (aSchematron, "SchematronResource");
91      ValueEnforcer.notNull (aXML, "XMLSource");
92  
93      try
94      {
95        // Apply Schematron on XML
96        return aSchematron.applySchematronValidationToSVRL (aXML);
97      }
98      catch (final Exception ex)
99      {
100       throw new IllegalArgumentException ("Failed to apply Schematron " +
101                                           aSchematron.getID () +
102                                           " onto XML resource " +
103                                           aXML.getResourceID (),
104                                           ex);
105     }
106   }
107 
108   /**
109    * Apply the passed schematron on the passed XML resource.
110    *
111    * @param aSchematron
112    *        The Schematron resource. May not be <code>null</code>.
113    * @param aXML
114    *        The XML resource. May not be <code>null</code>.
115    * @return <code>null</code> if either the Schematron or the XML could not be
116    *         read.
117    * @throws IllegalStateException
118    *         if the processing throws an unexpected exception.
119    */
120   @Nullable
121   public static SchematronOutputType applySchematron (@Nonnull final ISchematronResource aSchematron,
122                                                       @Nonnull final Source aXML)
123   {
124     ValueEnforcer.notNull (aSchematron, "SchematronResource");
125     ValueEnforcer.notNull (aXML, "XMLSource");
126 
127     try
128     {
129       // Apply Schematron on XML.
130       return aSchematron.applySchematronValidationToSVRL (aXML);
131     }
132     catch (final Exception ex)
133     {
134       throw new IllegalArgumentException ("Failed to apply Schematron " +
135                                           aSchematron.getID () +
136                                           " onto XML source " +
137                                           aXML,
138                                           ex);
139     }
140   }
141 
142   /**
143    * Apply the passed schematron on the passed XML node.
144    *
145    * @param aSchematron
146    *        The Schematron resource. May not be <code>null</code>.
147    * @param aNode
148    *        The XML node. May not be <code>null</code>.
149    * @return <code>null</code> if either the Schematron or the XML could not be
150    *         read.
151    * @throws IllegalStateException
152    *         if the processing throws an unexpected exception.
153    */
154   @Nullable
155   public static SchematronOutputType applySchematron (@Nonnull final ISchematronResource aSchematron,
156                                                       @Nonnull final Node aNode)
157   {
158     ValueEnforcer.notNull (aSchematron, "SchematronResource");
159     ValueEnforcer.notNull (aNode, "Node");
160 
161     return applySchematron (aSchematron, new DOMSource (aNode));
162   }
163 
164   /**
165    * Convert a {@link SchematronOutputType} to an {@link IResourceErrorGroup}.
166    *
167    * @param aSchematronOutput
168    *        The result of Schematron validation
169    * @param sResourceName
170    *        The name of the resource that was validated (may be a file path
171    *        etc.)
172    * @return List non-<code>null</code> error list of {@link SVRLResourceError}
173    *         objects.
174    */
175   @Nonnull
176   public static IResourceErrorGroup convertToResourceErrorGroup (@Nonnull final SchematronOutputType aSchematronOutput,
177                                                                  @Nullable final String sResourceName)
178   {
179     ValueEnforcer.notNull (aSchematronOutput, "SchematronOutput");
180 
181     final ResourceErrorGroup ret = new ResourceErrorGroup ();
182     for (final SVRLFailedAssert aFailedAssert : SVRLHelper.getAllFailedAssertions (aSchematronOutput))
183       ret.addResourceError (aFailedAssert.getAsResourceError (sResourceName));
184     return ret;
185   }
186 
187   @SuppressFBWarnings ("RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE")
188   @Nonnull
189   private static ESuccess _recursiveResolveAllSchematronIncludes (@Nonnull final IMicroElement eRoot,
190                                                                   @Nonnull final IReadableResource aResource,
191                                                                   @Nullable final ISAXReaderSettings aSettings,
192                                                                   @Nonnull final IPSErrorHandler aErrorHandler)
193   {
194     if (eRoot != null)
195     {
196       final DefaultSchematronIncludeResolver aIncludeResolver = new DefaultSchematronIncludeResolver (aResource);
197 
198       for (final IMicroElement aElement : eRoot.getAllChildElementsRecursive ())
199         if (CSchematron.NAMESPACE_SCHEMATRON.equals (aElement.getNamespaceURI ()) &&
200             aElement.getLocalName ().equals (CSchematronXML.ELEMENT_INCLUDE))
201         {
202           String sHref = aElement.getAttributeValue (CSchematronXML.ATTR_HREF);
203           try
204           {
205             final int nHashIndex = sHref.indexOf ('#');
206             String sAnchor = null;
207             if (nHashIndex >= 0)
208             {
209               sAnchor = sHref.substring (nHashIndex + 1);
210               sHref = sHref.substring (0, nHashIndex);
211             }
212 
213             final IReadableResource aIncludeRes = aIncludeResolver.getResolvedSchematronResource (sHref);
214             if (aIncludeRes == null)
215             {
216               aErrorHandler.error (aResource, null, "Failed to resolve include '" + sHref + "'", null);
217               return ESuccess.FAILURE;
218             }
219 
220             if (s_aLogger.isDebugEnabled ())
221               s_aLogger.debug ("Resolved '" +
222                                sHref +
223                                "' relative to '" +
224                                aIncludeResolver.getBaseHref () +
225                                "' as '" +
226                                aIncludeRes.getPath () +
227                                "'");
228 
229             // Read XML to be included
230             final IMicroDocument aIncludedDoc = MicroReader.readMicroXML (aIncludeRes, aSettings);
231             if (aIncludedDoc == null)
232             {
233               aErrorHandler.error (aResource, null, "Failed to parse include " + aIncludeRes, null);
234               return ESuccess.FAILURE;
235             }
236 
237             IMicroElement aIncludedContent;
238             if (sAnchor == null)
239             {
240               // no anchor present - include the whole document
241 
242               // Return the document element
243               aIncludedContent = aIncludedDoc.getDocumentElement ();
244             }
245             else
246             {
247               final String sFinalAnchor = sAnchor;
248               final Wrapper <IMicroElement> aMatch = new Wrapper <IMicroElement> ();
249               MicroVisitor.visit (aIncludedDoc.getDocumentElement (),
250                                   new DefaultHierarchyVisitorCallback <IMicroNode> ()
251                                   {
252                                     @Override
253                                     public EHierarchyVisitorReturn onItemBeforeChildren (final IMicroNode aItem)
254                                     {
255                                       if (aItem.isElement ())
256                                       {
257                                         final IMicroElement aCurElement = (IMicroElement) aItem;
258                                         final String sID = aCurElement.getAttributeValue ("id");
259                                         if (sFinalAnchor.equals (sID))
260                                           aMatch.set (aCurElement);
261                                       }
262                                       return EHierarchyVisitorReturn.CONTINUE;
263                                     }
264                                   });
265               aIncludedContent = aMatch.get ();
266               if (aIncludedContent == null)
267               {
268                 aErrorHandler.warn (aResource,
269                                     null,
270                                     "Failed to resolve an element with the ID '" +
271                                           sAnchor +
272                                           "' in " +
273                                           aIncludeRes +
274                                           "! Therefore including the whole document!");
275                 aIncludedContent = aIncludedDoc.getDocumentElement ();
276               }
277             }
278 
279             // Important to detach from parent!
280             aIncludedContent.detachFromParent ();
281 
282             // Check for correct namespace URI of included content
283             if (!CSchematron.NAMESPACE_SCHEMATRON.equals (aIncludedContent.getNamespaceURI ()))
284             {
285               aErrorHandler.error (aResource,
286                                    null,
287                                    "The included resource " +
288                                          aIncludeRes +
289                                          " contains the wrong XML namespace URI '" +
290                                          aIncludedContent.getNamespaceURI () +
291                                          "' but was expected to have '" +
292                                          CSchematron.NAMESPACE_SCHEMATRON +
293                                          "'",
294                                    null);
295               return ESuccess.FAILURE;
296             }
297 
298             // Check that not a whole Schema but only a part is included
299             if (CSchematronXML.ELEMENT_SCHEMA.equals (aIncludedContent.getLocalName ()))
300             {
301               aErrorHandler.warn (aResource,
302                                   null,
303                                   "The included resource " +
304                                         aIncludeRes +
305                                         " seems to be a complete schema. To includes parts of a schema the respective element must be the root element of the included resource.");
306             }
307 
308             // Recursive resolve includes
309             if (_recursiveResolveAllSchematronIncludes (aIncludedContent,
310                                                         aIncludeRes,
311                                                         aSettings,
312                                                         aErrorHandler).isFailure ())
313               return ESuccess.FAILURE;
314 
315             // Now replace "include" element with content in MicroDOM
316             aElement.getParent ().replaceChild (aElement, aIncludedContent);
317           }
318           catch (final IOException ex)
319           {
320             aErrorHandler.error (aResource, null, "Failed to read include '" + sHref + "'", ex);
321             return ESuccess.FAILURE;
322           }
323         }
324     }
325     return ESuccess.SUCCESS;
326   }
327 
328   /**
329    * Resolve all Schematron includes of the passed resource.
330    *
331    * @param aResource
332    *        The Schematron resource to read. May not be <code>null</code>.
333    * @return <code>null</code> if the passed resource could not be read as XML
334    *         document
335    */
336   @Nullable
337   public static IMicroDocument getWithResolvedSchematronIncludes (@Nonnull final IReadableResource aResource)
338   {
339     return getWithResolvedSchematronIncludes (aResource, (ISAXReaderSettings) null, new LoggingPSErrorHandler ());
340   }
341 
342   /**
343    * Resolve all Schematron includes of the passed resource.
344    *
345    * @param aResource
346    *        The Schematron resource to read. May not be <code>null</code>.
347    * @param aSettings
348    *        The SAX reader settings to be used. May be <code>null</code> to use
349    *        the default settings.
350    * @return <code>null</code> if the passed resource could not be read as XML
351    *         document
352    */
353   @Nullable
354   @Deprecated
355   public static IMicroDocument getWithResolvedSchematronIncludes (@Nonnull final IReadableResource aResource,
356                                                                   @Nullable final ISAXReaderSettings aSettings)
357   {
358     return getWithResolvedSchematronIncludes (aResource, aSettings, new LoggingPSErrorHandler ());
359   }
360 
361   /**
362    * Resolve all Schematron includes of the passed resource.
363    *
364    * @param aResource
365    *        The Schematron resource to read. May not be <code>null</code>.
366    * @param aSettings
367    *        The SAX reader settings to be used. May be <code>null</code> to use
368    *        the default settings.
369    * @param aErrorHandler
370    *        The error handler to be used. May not be <code>null</code>.
371    * @return <code>null</code> if the passed resource could not be read as XML
372    *         document
373    */
374   @Nullable
375   public static IMicroDocument getWithResolvedSchematronIncludes (@Nonnull final IReadableResource aResource,
376                                                                   @Nullable final ISAXReaderSettings aSettings,
377                                                                   @Nonnull final IPSErrorHandler aErrorHandler)
378   {
379     final IMicroDocument aDoc = MicroReader.readMicroXML (aResource, aSettings);
380     if (aDoc != null)
381     {
382       // Resolve all Schematron includes
383       if (_recursiveResolveAllSchematronIncludes (aDoc.getDocumentElement (),
384                                                   aResource,
385                                                   aSettings,
386                                                   aErrorHandler).isFailure ())
387       {
388         // Error resolving includes
389         return null;
390       }
391     }
392     return aDoc;
393   }
394 }