View Javadoc
1   /**
2    * Copyright (C) 2014-2017 Philip Helger (www.helger.com)
3    * philip[at]helger[dot]com
4    *
5    * Licensed under the Apache License, Version 2.0 (the "License");
6    * you may not use this file except in compliance with the License.
7    * You may obtain a copy of the License at
8    *
9    *         http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package com.helger.schematron;
18  
19  import java.io.IOException;
20  
21  import javax.annotation.Nonnull;
22  import javax.annotation.Nullable;
23  import javax.annotation.concurrent.Immutable;
24  import javax.xml.transform.Source;
25  import javax.xml.transform.dom.DOMSource;
26  
27  import org.oclc.purl.dsdl.svrl.SchematronOutputType;
28  import org.slf4j.Logger;
29  import org.slf4j.LoggerFactory;
30  import org.w3c.dom.Node;
31  import org.xml.sax.InputSource;
32  
33  import com.helger.commons.ValueEnforcer;
34  import com.helger.commons.annotation.PresentForCodeCoverage;
35  import com.helger.commons.error.list.ErrorList;
36  import com.helger.commons.error.list.IErrorList;
37  import com.helger.commons.hierarchy.visit.ChildrenProviderHierarchyVisitor;
38  import com.helger.commons.hierarchy.visit.DefaultHierarchyVisitorCallback;
39  import com.helger.commons.hierarchy.visit.EHierarchyVisitorReturn;
40  import com.helger.commons.io.resource.IReadableResource;
41  import com.helger.commons.state.ESuccess;
42  import com.helger.commons.wrapper.Wrapper;
43  import com.helger.schematron.pure.errorhandler.IPSErrorHandler;
44  import com.helger.schematron.pure.errorhandler.LoggingPSErrorHandler;
45  import com.helger.schematron.resolve.DefaultSchematronIncludeResolver;
46  import com.helger.schematron.svrl.SVRLFailedAssert;
47  import com.helger.schematron.svrl.SVRLHelper;
48  import com.helger.schematron.svrl.SVRLResourceError;
49  import com.helger.xml.microdom.IMicroDocument;
50  import com.helger.xml.microdom.IMicroElement;
51  import com.helger.xml.microdom.IMicroNode;
52  import com.helger.xml.microdom.serialize.MicroReader;
53  import com.helger.xml.sax.InputSourceFactory;
54  import com.helger.xml.serialize.read.ISAXReaderSettings;
55  
56  import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
57  
58  /**
59   * This is a helper class that provides a way to easily apply an Schematron
60   * resource on an XML resource.
61   *
62   * @author Philip Helger
63   */
64  @Immutable
65  public final class SchematronHelper
66  {
67    private static final Logger s_aLogger = LoggerFactory.getLogger (SchematronHelper.class);
68  
69    @PresentForCodeCoverage
70    private static final SchematronHelper s_aInstance = new SchematronHelper ();
71  
72    private SchematronHelper ()
73    {}
74  
75    /**
76     * Apply the passed schematron on the passed XML resource using a custom error
77     * handler.
78     *
79     * @param aSchematron
80     *        The Schematron resource. May not be <code>null</code>.
81     * @param aXML
82     *        The XML resource. May not be <code>null</code>.
83     * @return <code>null</code> if either the Schematron or the XML could not be
84     *         read.
85     * @throws IllegalStateException
86     *         if the processing throws an unexpected exception.
87     */
88    @Nullable
89    public static SchematronOutputType applySchematron (@Nonnull final ISchematronResource aSchematron,
90                                                        @Nonnull final IReadableResource aXML)
91    {
92      ValueEnforcer.notNull (aSchematron, "SchematronResource");
93      ValueEnforcer.notNull (aXML, "XMLSource");
94  
95      try
96      {
97        // Apply Schematron on XML
98        return aSchematron.applySchematronValidationToSVRL (aXML);
99      }
100     catch (final Exception ex)
101     {
102       throw new IllegalArgumentException ("Failed to apply Schematron " +
103                                           aSchematron.getID () +
104                                           " onto XML resource " +
105                                           aXML.getResourceID (),
106                                           ex);
107     }
108   }
109 
110   /**
111    * Apply the passed schematron on the passed XML resource.
112    *
113    * @param aSchematron
114    *        The Schematron resource. May not be <code>null</code>.
115    * @param aXML
116    *        The XML resource. May not be <code>null</code>.
117    * @return <code>null</code> if either the Schematron or the XML could not be
118    *         read.
119    * @throws IllegalStateException
120    *         if the processing throws an unexpected exception.
121    */
122   @Nullable
123   public static SchematronOutputType applySchematron (@Nonnull final ISchematronResource aSchematron,
124                                                       @Nonnull final Source aXML)
125   {
126     ValueEnforcer.notNull (aSchematron, "SchematronResource");
127     ValueEnforcer.notNull (aXML, "XMLSource");
128 
129     try
130     {
131       // Apply Schematron on XML.
132       return aSchematron.applySchematronValidationToSVRL (aXML);
133     }
134     catch (final Exception ex)
135     {
136       throw new IllegalArgumentException ("Failed to apply Schematron " +
137                                           aSchematron.getID () +
138                                           " onto XML source " +
139                                           aXML,
140                                           ex);
141     }
142   }
143 
144   /**
145    * Apply the passed schematron on the passed XML node.
146    *
147    * @param aSchematron
148    *        The Schematron resource. May not be <code>null</code>.
149    * @param aNode
150    *        The XML node. May not be <code>null</code>.
151    * @return <code>null</code> if either the Schematron or the XML could not be
152    *         read.
153    * @throws IllegalStateException
154    *         if the processing throws an unexpected exception.
155    */
156   @Nullable
157   public static SchematronOutputType applySchematron (@Nonnull final ISchematronResource aSchematron,
158                                                       @Nonnull final Node aNode)
159   {
160     ValueEnforcer.notNull (aSchematron, "SchematronResource");
161     ValueEnforcer.notNull (aNode, "Node");
162 
163     return applySchematron (aSchematron, new DOMSource (aNode));
164   }
165 
166   /**
167    * Convert a {@link SchematronOutputType} to an {@link IErrorList}.
168    *
169    * @param aSchematronOutput
170    *        The result of Schematron validation
171    * @param sResourceName
172    *        The name of the resource that was validated (may be a file path
173    *        etc.)
174    * @return List non-<code>null</code> error list of {@link SVRLResourceError}
175    *         objects.
176    */
177   @Nonnull
178   public static IErrorList convertToErrorList (@Nonnull final SchematronOutputType aSchematronOutput,
179                                                @Nullable final String sResourceName)
180   {
181     ValueEnforcer.notNull (aSchematronOutput, "SchematronOutput");
182 
183     final ErrorList ret = new ErrorList ();
184     for (final SVRLFailedAssert aFailedAssert : SVRLHelper.getAllFailedAssertions (aSchematronOutput))
185       ret.add (aFailedAssert.getAsResourceError (sResourceName));
186     return ret;
187   }
188 
189   @SuppressFBWarnings ("RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE")
190   @Nonnull
191   private static ESuccess _recursiveResolveAllSchematronIncludes (@Nonnull final IMicroElement eRoot,
192                                                                   @Nonnull final IReadableResource aResource,
193                                                                   @Nullable final ISAXReaderSettings aSettings,
194                                                                   @Nonnull final IPSErrorHandler aErrorHandler)
195   {
196     if (eRoot != null)
197     {
198       final DefaultSchematronIncludeResolver aIncludeResolver = new DefaultSchematronIncludeResolver (aResource);
199 
200       for (final IMicroElement aElement : eRoot.getAllChildElementsRecursive ())
201         if (CSchematron.NAMESPACE_SCHEMATRON.equals (aElement.getNamespaceURI ()) &&
202             aElement.getLocalName ().equals (CSchematronXML.ELEMENT_INCLUDE))
203         {
204           String sHref = aElement.getAttributeValue (CSchematronXML.ATTR_HREF);
205           try
206           {
207             final int nHashIndex = sHref.indexOf ('#');
208             String sAnchor = null;
209             if (nHashIndex >= 0)
210             {
211               sAnchor = sHref.substring (nHashIndex + 1);
212               sHref = sHref.substring (0, nHashIndex);
213             }
214 
215             final IReadableResource aIncludeRes = aIncludeResolver.getResolvedSchematronResource (sHref);
216             if (aIncludeRes == null)
217             {
218               aErrorHandler.error (aResource, null, "Failed to resolve include '" + sHref + "'", null);
219               return ESuccess.FAILURE;
220             }
221 
222             if (s_aLogger.isDebugEnabled ())
223               s_aLogger.debug ("Resolved '" +
224                                sHref +
225                                "' relative to '" +
226                                aIncludeResolver.getBaseHref () +
227                                "' as '" +
228                                aIncludeRes.getPath () +
229                                "'");
230 
231             // Read XML to be included
232             final IMicroDocument aIncludedDoc = MicroReader.readMicroXML (aIncludeRes, aSettings);
233             if (aIncludedDoc == null)
234             {
235               aErrorHandler.error (aResource, null, "Failed to parse include " + aIncludeRes, null);
236               return ESuccess.FAILURE;
237             }
238 
239             IMicroElement aIncludedContent;
240             if (sAnchor == null)
241             {
242               // no anchor present - include the whole document
243 
244               // Return the document element
245               aIncludedContent = aIncludedDoc.getDocumentElement ();
246             }
247             else
248             {
249               final String sFinalAnchor = sAnchor;
250               final Wrapper <IMicroElement> aMatch = new Wrapper <> ();
251               // Also include the root element in the search
252               ChildrenProviderHierarchyVisitor.visitFrom (aIncludedDoc.getDocumentElement (),
253                                                           new DefaultHierarchyVisitorCallback <IMicroNode> ()
254                                                           {
255                                                             @Override
256                                                             public EHierarchyVisitorReturn onItemBeforeChildren (final IMicroNode aItem)
257                                                             {
258                                                               if (aItem.isElement ())
259                                                               {
260                                                                 final IMicroElement aCurElement = (IMicroElement) aItem;
261                                                                 final String sID = aCurElement.getAttributeValue ("id");
262                                                                 if (sFinalAnchor.equals (sID))
263                                                                   aMatch.set (aCurElement);
264                                                               }
265                                                               return EHierarchyVisitorReturn.CONTINUE;
266                                                             }
267                                                           },
268                                                           true);
269               aIncludedContent = aMatch.get ();
270               if (aIncludedContent == null)
271               {
272                 aErrorHandler.warn (aResource,
273                                     null,
274                                     "Failed to resolve an element with the ID '" +
275                                           sAnchor +
276                                           "' in " +
277                                           aIncludeRes +
278                                           "! Therefore including the whole document!");
279                 aIncludedContent = aIncludedDoc.getDocumentElement ();
280               }
281             }
282 
283             // Important to detach from parent!
284             aIncludedContent.detachFromParent ();
285 
286             // Check for correct namespace URI of included content
287             if (!CSchematron.NAMESPACE_SCHEMATRON.equals (aIncludedContent.getNamespaceURI ()))
288             {
289               aErrorHandler.error (aResource,
290                                    null,
291                                    "The included resource " +
292                                          aIncludeRes +
293                                          " contains the wrong XML namespace URI '" +
294                                          aIncludedContent.getNamespaceURI () +
295                                          "' but was expected to have '" +
296                                          CSchematron.NAMESPACE_SCHEMATRON +
297                                          "'",
298                                    null);
299               return ESuccess.FAILURE;
300             }
301 
302             // Check that not a whole Schema but only a part is included
303             if (CSchematronXML.ELEMENT_SCHEMA.equals (aIncludedContent.getLocalName ()))
304             {
305               aErrorHandler.warn (aResource,
306                                   null,
307                                   "The included resource " +
308                                         aIncludeRes +
309                                         " seems to be a complete schema. To includes parts of a schema the respective element must be the root element of the included resource.");
310             }
311 
312             // Recursive resolve includes
313             if (_recursiveResolveAllSchematronIncludes (aIncludedContent,
314                                                         aIncludeRes,
315                                                         aSettings,
316                                                         aErrorHandler).isFailure ())
317               return ESuccess.FAILURE;
318 
319             // Now replace "include" element with content in MicroDOM
320             aElement.getParent ().replaceChild (aElement, aIncludedContent);
321           }
322           catch (final IOException ex)
323           {
324             aErrorHandler.error (aResource, null, "Failed to read include '" + sHref + "'", ex);
325             return ESuccess.FAILURE;
326           }
327         }
328     }
329     return ESuccess.SUCCESS;
330   }
331 
332   /**
333    * Resolve all Schematron includes of the passed resource.
334    *
335    * @param aResource
336    *        The Schematron resource to read. May not be <code>null</code>.
337    * @return <code>null</code> if the passed resource could not be read as XML
338    *         document
339    */
340   @Nullable
341   public static IMicroDocument getWithResolvedSchematronIncludes (@Nonnull final IReadableResource aResource)
342   {
343     return getWithResolvedSchematronIncludes (aResource, (ISAXReaderSettings) null, new LoggingPSErrorHandler ());
344   }
345 
346   /**
347    * Resolve all Schematron includes of the passed resource.
348    *
349    * @param aResource
350    *        The Schematron resource to read. May not be <code>null</code>.
351    * @param aSettings
352    *        The SAX reader settings to be used. May be <code>null</code> to use
353    *        the default settings.
354    * @param aErrorHandler
355    *        The error handler to be used. May not be <code>null</code>.
356    * @return <code>null</code> if the passed resource could not be read as XML
357    *         document
358    */
359   @Nullable
360   public static IMicroDocument getWithResolvedSchematronIncludes (@Nonnull final IReadableResource aResource,
361                                                                   @Nullable final ISAXReaderSettings aSettings,
362                                                                   @Nonnull final IPSErrorHandler aErrorHandler)
363   {
364     final InputSource aIS = InputSourceFactory.create (aResource);
365     final IMicroDocument aDoc = MicroReader.readMicroXML (aIS, aSettings);
366     if (aDoc != null)
367     {
368       // Resolve all Schematron includes
369       if (_recursiveResolveAllSchematronIncludes (aDoc.getDocumentElement (),
370                                                   aResource,
371                                                   aSettings,
372                                                   aErrorHandler).isFailure ())
373       {
374         // Error resolving includes
375         return null;
376       }
377     }
378     return aDoc;
379   }
380 }